]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / kern / thread.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65 /*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84 #include <mach/mach_types.h>
85 #include <mach/boolean.h>
86 #include <mach/policy.h>
87 #include <mach/thread_info.h>
88 #include <mach/thread_special_ports.h>
89 #include <mach/thread_status.h>
90 #include <mach/time_value.h>
91 #include <mach/vm_param.h>
92
93 #include <machine/thread.h>
94 #include <machine/pal_routines.h>
95 #include <machine/limits.h>
96
97 #include <kern/kern_types.h>
98 #include <kern/kalloc.h>
99 #include <kern/cpu_data.h>
100 #include <kern/counters.h>
101 #include <kern/extmod_statistics.h>
102 #include <kern/ipc_mig.h>
103 #include <kern/ipc_tt.h>
104 #include <kern/mach_param.h>
105 #include <kern/machine.h>
106 #include <kern/misc_protos.h>
107 #include <kern/processor.h>
108 #include <kern/queue.h>
109 #include <kern/sched.h>
110 #include <kern/sched_prim.h>
111 #include <kern/sync_lock.h>
112 #include <kern/syscall_subr.h>
113 #include <kern/task.h>
114 #include <kern/thread.h>
115 #include <kern/thread_group.h>
116 #include <kern/coalition.h>
117 #include <kern/host.h>
118 #include <kern/zalloc.h>
119 #include <kern/assert.h>
120 #include <kern/exc_resource.h>
121 #include <kern/exc_guard.h>
122 #include <kern/telemetry.h>
123 #include <kern/policy_internal.h>
124 #include <kern/turnstile.h>
125
126 #include <corpses/task_corpse.h>
127 #if KPC
128 #include <kern/kpc.h>
129 #endif
130
131 #if MONOTONIC
132 #include <kern/monotonic.h>
133 #include <machine/monotonic.h>
134 #endif /* MONOTONIC */
135
136 #include <ipc/ipc_kmsg.h>
137 #include <ipc/ipc_port.h>
138 #include <bank/bank_types.h>
139
140 #include <vm/vm_kern.h>
141 #include <vm/vm_pageout.h>
142
143 #include <sys/kdebug.h>
144 #include <sys/bsdtask_info.h>
145 #include <mach/sdt.h>
146 #include <san/kasan.h>
147
148 #include <stdatomic.h>
149
150 /*
151 * Exported interfaces
152 */
153 #include <mach/task_server.h>
154 #include <mach/thread_act_server.h>
155 #include <mach/mach_host_server.h>
156 #include <mach/host_priv_server.h>
157 #include <mach/mach_voucher_server.h>
158 #include <kern/policy_internal.h>
159
160 static struct zone *thread_zone;
161 static lck_grp_attr_t thread_lck_grp_attr;
162 lck_attr_t thread_lck_attr;
163 lck_grp_t thread_lck_grp;
164
165 struct zone *thread_qos_override_zone;
166
167 decl_simple_lock_data(static,thread_stack_lock)
168 static queue_head_t thread_stack_queue;
169
170 decl_simple_lock_data(static,thread_terminate_lock)
171 static queue_head_t thread_terminate_queue;
172
173 static queue_head_t thread_deallocate_queue;
174
175 static queue_head_t turnstile_deallocate_queue;
176
177 static queue_head_t crashed_threads_queue;
178
179 static queue_head_t workq_deallocate_queue;
180
181 decl_simple_lock_data(static,thread_exception_lock)
182 static queue_head_t thread_exception_queue;
183
184 struct thread_exception_elt {
185 queue_chain_t elt;
186 exception_type_t exception_type;
187 task_t exception_task;
188 thread_t exception_thread;
189 };
190
191 static struct thread thread_template, init_thread;
192 static void thread_deallocate_enqueue(thread_t thread);
193 static void thread_deallocate_complete(thread_t thread);
194
195 #ifdef MACH_BSD
196 extern void proc_exit(void *);
197 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
198 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
199 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
200 extern int proc_selfpid(void);
201 extern void proc_name(int, char*, int);
202 extern char * proc_name_address(void *p);
203 #endif /* MACH_BSD */
204
205 extern int disable_exc_resource;
206 extern int audio_active;
207 extern int debug_task;
208 int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
209 int task_threadmax = CONFIG_THREAD_MAX;
210
211 static uint64_t thread_unique_id = 100;
212
213 struct _thread_ledger_indices thread_ledgers = { -1 };
214 static ledger_template_t thread_ledger_template = NULL;
215 static void init_thread_ledgers(void);
216
217 #if CONFIG_JETSAM
218 void jetsam_on_ledger_cpulimit_exceeded(void);
219 #endif
220
221 extern int task_thread_soft_limit;
222 extern int exc_via_corpse_forking;
223
224 #if DEVELOPMENT || DEBUG
225 extern int exc_resource_threads_enabled;
226 #endif /* DEVELOPMENT || DEBUG */
227
228 /*
229 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
230 *
231 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
232 * stacktraces, aka micro-stackshots)
233 */
234 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
235
236 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
237 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
238 #if DEVELOPMENT || DEBUG
239 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
240 #endif /* DEVELOPMENT || DEBUG */
241
242 /*
243 * The smallest interval over which we support limiting CPU consumption is 1ms
244 */
245 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
246
247 os_refgrp_decl(static, thread_refgrp, "thread", NULL);
248
249 void
250 thread_bootstrap(void)
251 {
252 /*
253 * Fill in a template thread for fast initialization.
254 */
255
256 #if MACH_ASSERT
257 thread_template.thread_magic = THREAD_MAGIC;
258 #endif /* MACH_ASSERT */
259
260 thread_template.runq = PROCESSOR_NULL;
261
262 thread_template.reason = AST_NONE;
263 thread_template.at_safe_point = FALSE;
264 thread_template.wait_event = NO_EVENT64;
265 thread_template.waitq = NULL;
266 thread_template.wait_result = THREAD_WAITING;
267 thread_template.options = THREAD_ABORTSAFE;
268 thread_template.state = TH_WAIT | TH_UNINT;
269 thread_template.wake_active = FALSE;
270 thread_template.continuation = THREAD_CONTINUE_NULL;
271 thread_template.parameter = NULL;
272
273 thread_template.importance = 0;
274 thread_template.sched_mode = TH_MODE_NONE;
275 thread_template.sched_flags = 0;
276 thread_template.saved_mode = TH_MODE_NONE;
277 thread_template.safe_release = 0;
278 thread_template.th_sched_bucket = TH_BUCKET_RUN;
279
280 thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
281 thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
282
283 thread_template.active = 0;
284 thread_template.started = 0;
285 thread_template.static_param = 0;
286 thread_template.policy_reset = 0;
287
288 thread_template.base_pri = BASEPRI_DEFAULT;
289 thread_template.sched_pri = 0;
290 thread_template.max_priority = 0;
291 thread_template.task_priority = 0;
292 thread_template.promotions = 0;
293 thread_template.rwlock_count = 0;
294 thread_template.waiting_for_mutex = NULL;
295
296
297 thread_template.realtime.deadline = UINT64_MAX;
298
299 thread_template.quantum_remaining = 0;
300 thread_template.last_run_time = 0;
301 thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
302 thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
303 thread_template.same_pri_latency = 0;
304
305 thread_template.computation_metered = 0;
306 thread_template.computation_epoch = 0;
307
308 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
309 thread_template.sched_stamp = 0;
310 thread_template.pri_shift = INT8_MAX;
311 thread_template.sched_usage = 0;
312 thread_template.cpu_usage = thread_template.cpu_delta = 0;
313 #endif
314 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
315
316 #if MONOTONIC
317 memset(&thread_template.t_monotonic, 0,
318 sizeof(thread_template.t_monotonic));
319 #endif /* MONOTONIC */
320
321 thread_template.bound_processor = PROCESSOR_NULL;
322 thread_template.last_processor = PROCESSOR_NULL;
323
324 thread_template.sched_call = NULL;
325
326 timer_init(&thread_template.user_timer);
327 timer_init(&thread_template.system_timer);
328 timer_init(&thread_template.ptime);
329 timer_init(&thread_template.runnable_timer);
330 thread_template.user_timer_save = 0;
331 thread_template.system_timer_save = 0;
332 thread_template.vtimer_user_save = 0;
333 thread_template.vtimer_prof_save = 0;
334 thread_template.vtimer_rlim_save = 0;
335 thread_template.vtimer_qos_save = 0;
336
337 #if CONFIG_SCHED_SFI
338 thread_template.wait_sfi_begin_time = 0;
339 #endif
340
341 thread_template.wait_timer_is_set = FALSE;
342 thread_template.wait_timer_active = 0;
343
344 thread_template.depress_timer_active = 0;
345
346 thread_template.recover = (vm_offset_t)NULL;
347
348 thread_template.map = VM_MAP_NULL;
349 #if DEVELOPMENT || DEBUG
350 thread_template.pmap_footprint_suspended = FALSE;
351 #endif /* DEVELOPMENT || DEBUG */
352
353 #if CONFIG_DTRACE
354 thread_template.t_dtrace_predcache = 0;
355 thread_template.t_dtrace_vtime = 0;
356 thread_template.t_dtrace_tracing = 0;
357 #endif /* CONFIG_DTRACE */
358
359 #if KPERF
360 thread_template.kperf_flags = 0;
361 thread_template.kperf_pet_gen = 0;
362 thread_template.kperf_c_switch = 0;
363 thread_template.kperf_pet_cnt = 0;
364 #endif
365
366 #if KPC
367 thread_template.kpc_buf = NULL;
368 #endif
369
370 #if HYPERVISOR
371 thread_template.hv_thread_target = NULL;
372 #endif /* HYPERVISOR */
373
374 #if (DEVELOPMENT || DEBUG)
375 thread_template.t_page_creation_throttled_hard = 0;
376 thread_template.t_page_creation_throttled_soft = 0;
377 #endif /* DEVELOPMENT || DEBUG */
378 thread_template.t_page_creation_throttled = 0;
379 thread_template.t_page_creation_count = 0;
380 thread_template.t_page_creation_time = 0;
381
382 thread_template.affinity_set = NULL;
383
384 thread_template.syscalls_unix = 0;
385 thread_template.syscalls_mach = 0;
386
387 thread_template.t_ledger = LEDGER_NULL;
388 thread_template.t_threadledger = LEDGER_NULL;
389 thread_template.t_bankledger = LEDGER_NULL;
390 thread_template.t_deduct_bank_ledger_time = 0;
391
392 thread_template.requested_policy = (struct thread_requested_policy) {};
393 thread_template.effective_policy = (struct thread_effective_policy) {};
394
395 bzero(&thread_template.overrides, sizeof(thread_template.overrides));
396 thread_template.sync_ipc_overrides = 0;
397
398 thread_template.iotier_override = THROTTLE_LEVEL_NONE;
399 thread_template.thread_io_stats = NULL;
400 #if CONFIG_EMBEDDED
401 thread_template.taskwatch = NULL;
402 #endif /* CONFIG_EMBEDDED */
403 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
404
405 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
406 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
407
408 thread_template.thread_tag = 0;
409
410 thread_template.ith_voucher_name = MACH_PORT_NULL;
411 thread_template.ith_voucher = IPC_VOUCHER_NULL;
412
413 thread_template.th_work_interval = NULL;
414
415 init_thread = thread_template;
416
417 machine_set_current_thread(&init_thread);
418 }
419
420 extern boolean_t allow_qos_policy_set;
421
422 void
423 thread_init(void)
424 {
425 thread_zone = zinit(
426 sizeof(struct thread),
427 thread_max * sizeof(struct thread),
428 THREAD_CHUNK * sizeof(struct thread),
429 "threads");
430
431 thread_qos_override_zone = zinit(
432 sizeof(struct thread_qos_override),
433 4 * thread_max * sizeof(struct thread_qos_override),
434 PAGE_SIZE,
435 "thread qos override");
436 zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
437 zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
438 zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
439 zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
440
441 lck_grp_attr_setdefault(&thread_lck_grp_attr);
442 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
443 lck_attr_setdefault(&thread_lck_attr);
444
445 stack_init();
446
447 thread_policy_init();
448
449 /*
450 * Initialize any machine-dependent
451 * per-thread structures necessary.
452 */
453 machine_thread_init();
454
455 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
456 sizeof (cpumon_ustackshots_trigger_pct))) {
457 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
458 }
459
460 PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
461
462 init_thread_ledgers();
463 }
464
465 boolean_t
466 thread_is_active(thread_t thread)
467 {
468 return (thread->active);
469 }
470
471 void
472 thread_corpse_continue(void)
473 {
474 thread_t thread = current_thread();
475
476 thread_terminate_internal(thread);
477
478 /*
479 * Handle the thread termination directly
480 * here instead of returning to userspace.
481 */
482 assert(thread->active == FALSE);
483 thread_ast_clear(thread, AST_APC);
484 thread_apc_ast(thread);
485
486 panic("thread_corpse_continue");
487 /*NOTREACHED*/
488 }
489
490 static void
491 thread_terminate_continue(void)
492 {
493 panic("thread_terminate_continue");
494 /*NOTREACHED*/
495 }
496
497 /*
498 * thread_terminate_self:
499 */
500 void
501 thread_terminate_self(void)
502 {
503 thread_t thread = current_thread();
504 task_t task;
505 int threadcnt;
506
507 pal_thread_terminate_self(thread);
508
509 DTRACE_PROC(lwp__exit);
510
511 thread_mtx_lock(thread);
512
513 ipc_thread_disable(thread);
514
515 thread_mtx_unlock(thread);
516
517 thread_sched_call(thread, NULL);
518
519 spl_t s = splsched();
520 thread_lock(thread);
521
522 thread_depress_abort_locked(thread);
523
524 thread_unlock(thread);
525 splx(s);
526
527 #if CONFIG_EMBEDDED
528 thead_remove_taskwatch(thread);
529 #endif /* CONFIG_EMBEDDED */
530
531 work_interval_thread_terminate(thread);
532
533 thread_mtx_lock(thread);
534
535 thread_policy_reset(thread);
536
537 thread_mtx_unlock(thread);
538
539 bank_swap_thread_bank_ledger(thread, NULL);
540
541 if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
542 char threadname[MAXTHREADNAMESIZE];
543 bsd_getthreadname(thread->uthread, threadname);
544 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
545 }
546
547 task = thread->task;
548 uthread_cleanup(task, thread->uthread, task->bsd_info);
549
550 if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
551 /* trace out pid before we sign off */
552 long dbg_arg1 = 0;
553 long dbg_arg2 = 0;
554
555 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
556 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
557 }
558
559 /*
560 * After this subtraction, this thread should never access
561 * task->bsd_info unless it got 0 back from the hw_atomic_sub. It
562 * could be racing with other threads to be the last thread in the
563 * process, and the last thread in the process will tear down the proc
564 * structure and zero-out task->bsd_info.
565 */
566 threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
567
568 /*
569 * If we are the last thread to terminate and the task is
570 * associated with a BSD process, perform BSD process exit.
571 */
572 if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
573 mach_exception_data_type_t subcode = 0;
574 if (kdebug_enable) {
575 /* since we're the last thread in this process, trace out the command name too */
576 long args[4] = {};
577 kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
578 KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
579 }
580
581 /* Get the exit reason before proc_exit */
582 subcode = proc_encode_exit_exception_code(task->bsd_info);
583 proc_exit(task->bsd_info);
584 /*
585 * if there is crash info in task
586 * then do the deliver action since this is
587 * last thread for this task.
588 */
589 if (task->corpse_info) {
590 task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
591 }
592 }
593
594 if (threadcnt == 0) {
595 task_lock(task);
596 if (task_is_a_corpse_fork(task)) {
597 thread_wakeup((event_t)&task->active_thread_count);
598 }
599 task_unlock(task);
600 }
601
602 uthread_cred_free(thread->uthread);
603
604 s = splsched();
605 thread_lock(thread);
606
607 /*
608 * Ensure that the depress timer is no longer enqueued,
609 * so the timer (stored in the thread) can be safely deallocated
610 *
611 * TODO: build timer_call_cancel_wait
612 */
613
614 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
615
616 uint32_t delay_us = 1;
617
618 while (thread->depress_timer_active > 0) {
619 thread_unlock(thread);
620 splx(s);
621
622 delay(delay_us++);
623
624 if (delay_us > USEC_PER_SEC)
625 panic("depress timer failed to inactivate!"
626 "thread: %p depress_timer_active: %d",
627 thread, thread->depress_timer_active);
628
629 s = splsched();
630 thread_lock(thread);
631 }
632
633 /*
634 * Cancel wait timer, and wait for
635 * concurrent expirations.
636 */
637 if (thread->wait_timer_is_set) {
638 thread->wait_timer_is_set = FALSE;
639
640 if (timer_call_cancel(&thread->wait_timer))
641 thread->wait_timer_active--;
642 }
643
644 delay_us = 1;
645
646 while (thread->wait_timer_active > 0) {
647 thread_unlock(thread);
648 splx(s);
649
650 delay(delay_us++);
651
652 if (delay_us > USEC_PER_SEC)
653 panic("wait timer failed to inactivate!"
654 "thread: %p wait_timer_active: %d",
655 thread, thread->wait_timer_active);
656
657 s = splsched();
658 thread_lock(thread);
659 }
660
661 /*
662 * If there is a reserved stack, release it.
663 */
664 if (thread->reserved_stack != 0) {
665 stack_free_reserved(thread);
666 thread->reserved_stack = 0;
667 }
668
669 /*
670 * Mark thread as terminating, and block.
671 */
672 thread->state |= TH_TERMINATE;
673 thread_mark_wait_locked(thread, THREAD_UNINT);
674
675 assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
676 assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
677 assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
678 assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
679 assert(thread->promotions == 0);
680 assert(thread->was_promoted_on_wakeup == 0);
681 assert(thread->waiting_for_mutex == NULL);
682 assert(thread->rwlock_count == 0);
683
684 thread_unlock(thread);
685 /* splsched */
686
687 thread_block((thread_continue_t)thread_terminate_continue);
688 /*NOTREACHED*/
689 }
690
691 static bool
692 thread_ref_release(thread_t thread)
693 {
694 if (thread == THREAD_NULL) {
695 return false;
696 }
697
698 assert_thread_magic(thread);
699
700 return os_ref_release(&thread->ref_count) == 0;
701 }
702
703 /* Drop a thread refcount safely without triggering a zfree */
704 void
705 thread_deallocate_safe(thread_t thread)
706 {
707 if (__improbable(thread_ref_release(thread))) {
708 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
709 thread_deallocate_enqueue(thread);
710 }
711 }
712
713 void
714 thread_deallocate(thread_t thread)
715 {
716 if (__improbable(thread_ref_release(thread))) {
717 thread_deallocate_complete(thread);
718 }
719 }
720
721 void
722 thread_deallocate_complete(
723 thread_t thread)
724 {
725 task_t task;
726
727 assert_thread_magic(thread);
728
729 assert(os_ref_get_count(&thread->ref_count) == 0);
730
731 assert(thread_owned_workloops_count(thread) == 0);
732
733 if (!(thread->state & TH_TERMINATE2))
734 panic("thread_deallocate: thread not properly terminated\n");
735
736 assert(thread->runq == PROCESSOR_NULL);
737
738 #if KPC
739 kpc_thread_destroy(thread);
740 #endif
741
742 ipc_thread_terminate(thread);
743
744 proc_thread_qos_deallocate(thread);
745
746 task = thread->task;
747
748 #ifdef MACH_BSD
749 {
750 void *ut = thread->uthread;
751
752 thread->uthread = NULL;
753 uthread_zone_free(ut);
754 }
755 #endif /* MACH_BSD */
756
757 if (thread->t_ledger)
758 ledger_dereference(thread->t_ledger);
759 if (thread->t_threadledger)
760 ledger_dereference(thread->t_threadledger);
761
762 assert(thread->turnstile != TURNSTILE_NULL);
763 if (thread->turnstile)
764 turnstile_deallocate(thread->turnstile);
765
766 if (IPC_VOUCHER_NULL != thread->ith_voucher)
767 ipc_voucher_release(thread->ith_voucher);
768
769 if (thread->thread_io_stats)
770 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
771
772 if (thread->kernel_stack != 0)
773 stack_free(thread);
774
775 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
776 machine_thread_destroy(thread);
777
778 task_deallocate(task);
779
780 #if MACH_ASSERT
781 assert_thread_magic(thread);
782 thread->thread_magic = 0;
783 #endif /* MACH_ASSERT */
784
785 zfree(thread_zone, thread);
786 }
787
788 void
789 thread_starts_owning_workloop(thread_t thread)
790 {
791 atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
792 memory_order_relaxed);
793 }
794
795 void
796 thread_ends_owning_workloop(thread_t thread)
797 {
798 __assert_only uint32_t count;
799 count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
800 memory_order_relaxed);
801 assert(count > 0);
802 }
803
804 uint32_t
805 thread_owned_workloops_count(thread_t thread)
806 {
807 return atomic_load_explicit(&thread->kqwl_owning_count,
808 memory_order_relaxed);
809 }
810
811 /*
812 * thread_inspect_deallocate:
813 *
814 * Drop a thread inspection reference.
815 */
816 void
817 thread_inspect_deallocate(
818 thread_inspect_t thread_inspect)
819 {
820 return(thread_deallocate((thread_t)thread_inspect));
821 }
822
823 /*
824 * thread_exception_daemon:
825 *
826 * Deliver EXC_{RESOURCE,GUARD} exception
827 */
828 static void
829 thread_exception_daemon(void)
830 {
831 struct thread_exception_elt *elt;
832 task_t task;
833 thread_t thread;
834 exception_type_t etype;
835
836 simple_lock(&thread_exception_lock);
837 while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
838 simple_unlock(&thread_exception_lock);
839
840 etype = elt->exception_type;
841 task = elt->exception_task;
842 thread = elt->exception_thread;
843 assert_thread_magic(thread);
844
845 kfree(elt, sizeof (*elt));
846
847 /* wait for all the threads in the task to terminate */
848 task_lock(task);
849 task_wait_till_threads_terminate_locked(task);
850 task_unlock(task);
851
852 /* Consumes the task ref returned by task_generate_corpse_internal */
853 task_deallocate(task);
854 /* Consumes the thread ref returned by task_generate_corpse_internal */
855 thread_deallocate(thread);
856
857 /* Deliver the notification, also clears the corpse. */
858 task_deliver_crash_notification(task, thread, etype, 0);
859
860 simple_lock(&thread_exception_lock);
861 }
862
863 assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
864 simple_unlock(&thread_exception_lock);
865
866 thread_block((thread_continue_t)thread_exception_daemon);
867 }
868
869 /*
870 * thread_exception_enqueue:
871 *
872 * Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
873 */
874 void
875 thread_exception_enqueue(
876 task_t task,
877 thread_t thread,
878 exception_type_t etype)
879 {
880 assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
881 struct thread_exception_elt *elt = kalloc(sizeof (*elt));
882 elt->exception_type = etype;
883 elt->exception_task = task;
884 elt->exception_thread = thread;
885
886 simple_lock(&thread_exception_lock);
887 enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
888 simple_unlock(&thread_exception_lock);
889
890 thread_wakeup((event_t)&thread_exception_queue);
891 }
892
893 /*
894 * thread_copy_resource_info
895 *
896 * Copy the resource info counters from source
897 * thread to destination thread.
898 */
899 void
900 thread_copy_resource_info(
901 thread_t dst_thread,
902 thread_t src_thread)
903 {
904 dst_thread->c_switch = src_thread->c_switch;
905 dst_thread->p_switch = src_thread->p_switch;
906 dst_thread->ps_switch = src_thread->ps_switch;
907 dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
908 dst_thread->user_timer = src_thread->user_timer;
909 dst_thread->user_timer_save = src_thread->user_timer_save;
910 dst_thread->system_timer = src_thread->system_timer;
911 dst_thread->system_timer_save = src_thread->system_timer_save;
912 dst_thread->runnable_timer = src_thread->runnable_timer;
913 dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
914 dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
915 dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
916 dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
917 dst_thread->syscalls_unix = src_thread->syscalls_unix;
918 dst_thread->syscalls_mach = src_thread->syscalls_mach;
919 ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
920 *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
921 }
922
923 /*
924 * thread_terminate_daemon:
925 *
926 * Perform final clean up for terminating threads.
927 */
928 static void
929 thread_terminate_daemon(void)
930 {
931 thread_t self, thread;
932 task_t task;
933
934 self = current_thread();
935 self->options |= TH_OPT_SYSTEM_CRITICAL;
936
937 (void)splsched();
938 simple_lock(&thread_terminate_lock);
939
940 thread_terminate_start:
941 while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
942 assert_thread_magic(thread);
943
944 /*
945 * if marked for crash reporting, skip reaping.
946 * The corpse delivery thread will clear bit and enqueue
947 * for reaping when done
948 */
949 if (thread->inspection){
950 enqueue_tail(&crashed_threads_queue, &thread->runq_links);
951 continue;
952 }
953
954 simple_unlock(&thread_terminate_lock);
955 (void)spllo();
956
957 task = thread->task;
958
959 task_lock(task);
960 task->total_user_time += timer_grab(&thread->user_timer);
961 task->total_ptime += timer_grab(&thread->ptime);
962 task->total_runnable_time += timer_grab(&thread->runnable_timer);
963 if (thread->precise_user_kernel_time) {
964 task->total_system_time += timer_grab(&thread->system_timer);
965 } else {
966 task->total_user_time += timer_grab(&thread->system_timer);
967 }
968
969 task->c_switch += thread->c_switch;
970 task->p_switch += thread->p_switch;
971 task->ps_switch += thread->ps_switch;
972
973 task->syscalls_unix += thread->syscalls_unix;
974 task->syscalls_mach += thread->syscalls_mach;
975
976 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
977 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
978 task->task_gpu_ns += ml_gpu_stat(thread);
979 task->task_energy += ml_energy_stat(thread);
980
981 #if MONOTONIC
982 mt_terminate_update(task, thread);
983 #endif /* MONOTONIC */
984
985 thread_update_qos_cpu_time(thread);
986
987 queue_remove(&task->threads, thread, thread_t, task_threads);
988 task->thread_count--;
989
990 /*
991 * If the task is being halted, and there is only one thread
992 * left in the task after this one, then wakeup that thread.
993 */
994 if (task->thread_count == 1 && task->halting)
995 thread_wakeup((event_t)&task->halting);
996
997 task_unlock(task);
998
999 lck_mtx_lock(&tasks_threads_lock);
1000 queue_remove(&threads, thread, thread_t, threads);
1001 threads_count--;
1002 lck_mtx_unlock(&tasks_threads_lock);
1003
1004 thread_deallocate(thread);
1005
1006 (void)splsched();
1007 simple_lock(&thread_terminate_lock);
1008 }
1009
1010 while ((thread = qe_dequeue_head(&thread_deallocate_queue, struct thread, runq_links)) != THREAD_NULL) {
1011 assert_thread_magic(thread);
1012
1013 simple_unlock(&thread_terminate_lock);
1014 (void)spllo();
1015
1016 thread_deallocate_complete(thread);
1017
1018 (void)splsched();
1019 simple_lock(&thread_terminate_lock);
1020 }
1021
1022 struct turnstile *turnstile;
1023 while ((turnstile = qe_dequeue_head(&turnstile_deallocate_queue, struct turnstile, ts_deallocate_link)) != TURNSTILE_NULL) {
1024
1025 simple_unlock(&thread_terminate_lock);
1026 (void)spllo();
1027
1028 turnstile_destroy(turnstile);
1029
1030 (void)splsched();
1031 simple_lock(&thread_terminate_lock);
1032 }
1033
1034 queue_entry_t qe;
1035
1036 /*
1037 * see workq_deallocate_enqueue: struct workqueue is opaque to thread.c and
1038 * we just link pieces of memory here
1039 */
1040 while ((qe = dequeue_head(&workq_deallocate_queue))) {
1041 simple_unlock(&thread_terminate_lock);
1042 (void)spllo();
1043
1044 workq_destroy((struct workqueue *)qe);
1045
1046 (void)splsched();
1047 simple_lock(&thread_terminate_lock);
1048 }
1049
1050 /*
1051 * Check if something enqueued in thread terminate/deallocate queue
1052 * while processing workq deallocate queue
1053 */
1054 if (!queue_empty(&thread_terminate_queue) ||
1055 !queue_empty(&thread_deallocate_queue) ||
1056 !queue_empty(&turnstile_deallocate_queue))
1057 goto thread_terminate_start;
1058
1059 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
1060 simple_unlock(&thread_terminate_lock);
1061 /* splsched */
1062
1063 self->options &= ~TH_OPT_SYSTEM_CRITICAL;
1064 thread_block((thread_continue_t)thread_terminate_daemon);
1065 /*NOTREACHED*/
1066 }
1067
1068 /*
1069 * thread_terminate_enqueue:
1070 *
1071 * Enqueue a terminating thread for final disposition.
1072 *
1073 * Called at splsched.
1074 */
1075 void
1076 thread_terminate_enqueue(
1077 thread_t thread)
1078 {
1079 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1080
1081 simple_lock(&thread_terminate_lock);
1082 enqueue_tail(&thread_terminate_queue, &thread->runq_links);
1083 simple_unlock(&thread_terminate_lock);
1084
1085 thread_wakeup((event_t)&thread_terminate_queue);
1086 }
1087
1088 /*
1089 * thread_deallocate_enqueue:
1090 *
1091 * Enqueue a thread for final deallocation.
1092 */
1093 static void
1094 thread_deallocate_enqueue(
1095 thread_t thread)
1096 {
1097 spl_t s = splsched();
1098
1099 simple_lock(&thread_terminate_lock);
1100 enqueue_tail(&thread_deallocate_queue, &thread->runq_links);
1101 simple_unlock(&thread_terminate_lock);
1102
1103 thread_wakeup((event_t)&thread_terminate_queue);
1104 splx(s);
1105 }
1106
1107 /*
1108 * turnstile_deallocate_enqueue:
1109 *
1110 * Enqueue a turnstile for final deallocation.
1111 */
1112 void
1113 turnstile_deallocate_enqueue(
1114 struct turnstile *turnstile)
1115 {
1116 spl_t s = splsched();
1117
1118 simple_lock(&thread_terminate_lock);
1119 enqueue_tail(&turnstile_deallocate_queue, &turnstile->ts_deallocate_link);
1120 simple_unlock(&thread_terminate_lock);
1121
1122 thread_wakeup((event_t)&thread_terminate_queue);
1123 splx(s);
1124 }
1125
1126 /*
1127 * workq_deallocate_enqueue:
1128 *
1129 * Enqueue a workqueue for final deallocation.
1130 */
1131 void
1132 workq_deallocate_enqueue(
1133 struct workqueue *wq)
1134 {
1135 spl_t s = splsched();
1136
1137 simple_lock(&thread_terminate_lock);
1138 /*
1139 * this is just to delay a zfree(), so we link the memory with no regards
1140 * for how the struct looks like.
1141 */
1142 enqueue_tail(&workq_deallocate_queue, (queue_entry_t)wq);
1143 simple_unlock(&thread_terminate_lock);
1144
1145 thread_wakeup((event_t)&thread_terminate_queue);
1146 splx(s);
1147 }
1148
1149 /*
1150 * thread_terminate_crashed_threads:
1151 * walk the list of crashed threads and put back set of threads
1152 * who are no longer being inspected.
1153 */
1154 void
1155 thread_terminate_crashed_threads()
1156 {
1157 thread_t th_remove;
1158 boolean_t should_wake_terminate_queue = FALSE;
1159 spl_t s = splsched();
1160
1161 simple_lock(&thread_terminate_lock);
1162 /*
1163 * loop through the crashed threads queue
1164 * to put any threads that are not being inspected anymore
1165 */
1166
1167 qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1168 /* make sure current_thread is never in crashed queue */
1169 assert(th_remove != current_thread());
1170
1171 if (th_remove->inspection == FALSE) {
1172 re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1173 should_wake_terminate_queue = TRUE;
1174 }
1175 }
1176
1177 simple_unlock(&thread_terminate_lock);
1178 splx(s);
1179 if (should_wake_terminate_queue == TRUE) {
1180 thread_wakeup((event_t)&thread_terminate_queue);
1181 }
1182 }
1183
1184 /*
1185 * thread_stack_daemon:
1186 *
1187 * Perform stack allocation as required due to
1188 * invoke failures.
1189 */
1190 static void
1191 thread_stack_daemon(void)
1192 {
1193 thread_t thread;
1194 spl_t s;
1195
1196 s = splsched();
1197 simple_lock(&thread_stack_lock);
1198
1199 while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1200 assert_thread_magic(thread);
1201
1202 simple_unlock(&thread_stack_lock);
1203 splx(s);
1204
1205 /* allocate stack with interrupts enabled so that we can call into VM */
1206 stack_alloc(thread);
1207
1208 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1209
1210 s = splsched();
1211 thread_lock(thread);
1212 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1213 thread_unlock(thread);
1214
1215 simple_lock(&thread_stack_lock);
1216 }
1217
1218 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1219 simple_unlock(&thread_stack_lock);
1220 splx(s);
1221
1222 thread_block((thread_continue_t)thread_stack_daemon);
1223 /*NOTREACHED*/
1224 }
1225
1226 /*
1227 * thread_stack_enqueue:
1228 *
1229 * Enqueue a thread for stack allocation.
1230 *
1231 * Called at splsched.
1232 */
1233 void
1234 thread_stack_enqueue(
1235 thread_t thread)
1236 {
1237 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1238 assert_thread_magic(thread);
1239
1240 simple_lock(&thread_stack_lock);
1241 enqueue_tail(&thread_stack_queue, &thread->runq_links);
1242 simple_unlock(&thread_stack_lock);
1243
1244 thread_wakeup((event_t)&thread_stack_queue);
1245 }
1246
1247 void
1248 thread_daemon_init(void)
1249 {
1250 kern_return_t result;
1251 thread_t thread = NULL;
1252
1253 simple_lock_init(&thread_terminate_lock, 0);
1254 queue_init(&thread_terminate_queue);
1255 queue_init(&thread_deallocate_queue);
1256 queue_init(&workq_deallocate_queue);
1257 queue_init(&turnstile_deallocate_queue);
1258 queue_init(&crashed_threads_queue);
1259
1260 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1261 if (result != KERN_SUCCESS)
1262 panic("thread_daemon_init: thread_terminate_daemon");
1263
1264 thread_deallocate(thread);
1265
1266 simple_lock_init(&thread_stack_lock, 0);
1267 queue_init(&thread_stack_queue);
1268
1269 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1270 if (result != KERN_SUCCESS)
1271 panic("thread_daemon_init: thread_stack_daemon");
1272
1273 thread_deallocate(thread);
1274
1275 simple_lock_init(&thread_exception_lock, 0);
1276 queue_init(&thread_exception_queue);
1277
1278 result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1279 if (result != KERN_SUCCESS)
1280 panic("thread_daemon_init: thread_exception_daemon");
1281
1282 thread_deallocate(thread);
1283 }
1284
1285 #define TH_OPTION_NONE 0x00
1286 #define TH_OPTION_NOCRED 0x01
1287 #define TH_OPTION_NOSUSP 0x02
1288 #define TH_OPTION_WORKQ 0x04
1289
1290 /*
1291 * Create a new thread.
1292 * Doesn't start the thread running.
1293 *
1294 * Task and tasks_threads_lock are returned locked on success.
1295 */
1296 static kern_return_t
1297 thread_create_internal(
1298 task_t parent_task,
1299 integer_t priority,
1300 thread_continue_t continuation,
1301 void *parameter,
1302 int options,
1303 thread_t *out_thread)
1304 {
1305 thread_t new_thread;
1306 static thread_t first_thread;
1307
1308 /*
1309 * Allocate a thread and initialize static fields
1310 */
1311 if (first_thread == THREAD_NULL)
1312 new_thread = first_thread = current_thread();
1313 else
1314 new_thread = (thread_t)zalloc(thread_zone);
1315 if (new_thread == THREAD_NULL)
1316 return (KERN_RESOURCE_SHORTAGE);
1317
1318 if (new_thread != first_thread)
1319 *new_thread = thread_template;
1320
1321 os_ref_init_count(&new_thread->ref_count, &thread_refgrp, 2);
1322
1323 #ifdef MACH_BSD
1324 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1325 if (new_thread->uthread == NULL) {
1326 #if MACH_ASSERT
1327 new_thread->thread_magic = 0;
1328 #endif /* MACH_ASSERT */
1329
1330 zfree(thread_zone, new_thread);
1331 return (KERN_RESOURCE_SHORTAGE);
1332 }
1333 #endif /* MACH_BSD */
1334
1335 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1336 #ifdef MACH_BSD
1337 void *ut = new_thread->uthread;
1338
1339 new_thread->uthread = NULL;
1340 /* cred free may not be necessary */
1341 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1342 uthread_cred_free(ut);
1343 uthread_zone_free(ut);
1344 #endif /* MACH_BSD */
1345
1346 #if MACH_ASSERT
1347 new_thread->thread_magic = 0;
1348 #endif /* MACH_ASSERT */
1349
1350 zfree(thread_zone, new_thread);
1351 return (KERN_FAILURE);
1352 }
1353
1354 new_thread->task = parent_task;
1355
1356 thread_lock_init(new_thread);
1357 wake_lock_init(new_thread);
1358
1359 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1360
1361 ipc_thread_init(new_thread);
1362
1363 new_thread->continuation = continuation;
1364 new_thread->parameter = parameter;
1365 new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1366 priority_queue_init(&new_thread->inheritor_queue,
1367 PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1368
1369 /* Allocate I/O Statistics structure */
1370 new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1371 assert(new_thread->thread_io_stats != NULL);
1372 bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1373 new_thread->sync_ipc_overrides = 0;
1374
1375 #if KASAN
1376 kasan_init_thread(&new_thread->kasan_data);
1377 #endif
1378
1379 #if CONFIG_IOSCHED
1380 /* Clear out the I/O Scheduling info for AppleFSCompression */
1381 new_thread->decmp_upl = NULL;
1382 #endif /* CONFIG_IOSCHED */
1383
1384 #if DEVELOPMENT || DEBUG
1385 task_lock(parent_task);
1386 uint16_t thread_limit = parent_task->task_thread_limit;
1387 if (exc_resource_threads_enabled &&
1388 thread_limit > 0 &&
1389 parent_task->thread_count >= thread_limit &&
1390 !parent_task->task_has_crossed_thread_limit &&
1391 !(parent_task->t_flags & TF_CORPSE)) {
1392 int thread_count = parent_task->thread_count;
1393 parent_task->task_has_crossed_thread_limit = TRUE;
1394 task_unlock(parent_task);
1395 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1396 }
1397 else {
1398 task_unlock(parent_task);
1399 }
1400 #endif
1401
1402 lck_mtx_lock(&tasks_threads_lock);
1403 task_lock(parent_task);
1404
1405 /*
1406 * Fail thread creation if parent task is being torn down or has too many threads
1407 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1408 */
1409 if (parent_task->active == 0 || parent_task->halting ||
1410 (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1411 (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1412 task_unlock(parent_task);
1413 lck_mtx_unlock(&tasks_threads_lock);
1414
1415 #ifdef MACH_BSD
1416 {
1417 void *ut = new_thread->uthread;
1418
1419 new_thread->uthread = NULL;
1420 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1421 /* cred free may not be necessary */
1422 uthread_cred_free(ut);
1423 uthread_zone_free(ut);
1424 }
1425 #endif /* MACH_BSD */
1426 ipc_thread_disable(new_thread);
1427 ipc_thread_terminate(new_thread);
1428 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1429 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1430 machine_thread_destroy(new_thread);
1431 zfree(thread_zone, new_thread);
1432 return (KERN_FAILURE);
1433 }
1434
1435 /* New threads inherit any default state on the task */
1436 machine_thread_inherit_taskwide(new_thread, parent_task);
1437
1438 task_reference_internal(parent_task);
1439
1440 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1441 /*
1442 * This task has a per-thread CPU limit; make sure this new thread
1443 * gets its limit set too, before it gets out of the kernel.
1444 */
1445 act_set_astledger(new_thread);
1446 }
1447
1448 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1449 if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1450 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1451
1452 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1453 }
1454
1455 new_thread->t_bankledger = LEDGER_NULL;
1456 new_thread->t_deduct_bank_ledger_time = 0;
1457 new_thread->t_deduct_bank_ledger_energy = 0;
1458
1459 new_thread->t_ledger = new_thread->task->ledger;
1460 if (new_thread->t_ledger)
1461 ledger_reference(new_thread->t_ledger);
1462
1463 #if defined(CONFIG_SCHED_MULTIQ)
1464 /* Cache the task's sched_group */
1465 new_thread->sched_group = parent_task->sched_group;
1466 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1467
1468 /* Cache the task's map */
1469 new_thread->map = parent_task->map;
1470
1471 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1472 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1473
1474 #if KPC
1475 kpc_thread_create(new_thread);
1476 #endif
1477
1478 /* Set the thread's scheduling parameters */
1479 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1480 new_thread->max_priority = parent_task->max_priority;
1481 new_thread->task_priority = parent_task->priority;
1482
1483 int new_priority = (priority < 0) ? parent_task->priority: priority;
1484 new_priority = (priority < 0)? parent_task->priority: priority;
1485 if (new_priority > new_thread->max_priority)
1486 new_priority = new_thread->max_priority;
1487 #if CONFIG_EMBEDDED
1488 if (new_priority < MAXPRI_THROTTLE) {
1489 new_priority = MAXPRI_THROTTLE;
1490 }
1491 #endif /* CONFIG_EMBEDDED */
1492
1493 new_thread->importance = new_priority - new_thread->task_priority;
1494
1495 sched_set_thread_base_priority(new_thread, new_priority);
1496
1497 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1498 new_thread->sched_stamp = sched_tick;
1499 new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1500 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1501
1502 #if CONFIG_EMBEDDED
1503 if (parent_task->max_priority <= MAXPRI_THROTTLE)
1504 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1505 #endif /* CONFIG_EMBEDDED */
1506
1507 thread_policy_create(new_thread);
1508
1509 /* Chain the thread onto the task's list */
1510 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1511 parent_task->thread_count++;
1512
1513 /* So terminating threads don't need to take the task lock to decrement */
1514 hw_atomic_add(&parent_task->active_thread_count, 1);
1515
1516 /* Protected by the tasks_threads_lock */
1517 new_thread->thread_id = ++thread_unique_id;
1518
1519
1520 queue_enter(&threads, new_thread, thread_t, threads);
1521 threads_count++;
1522
1523 new_thread->active = TRUE;
1524 if (task_is_a_corpse_fork(parent_task)) {
1525 /* Set the inspection bit if the task is a corpse fork */
1526 new_thread->inspection = TRUE;
1527 } else {
1528 new_thread->inspection = FALSE;
1529 }
1530 new_thread->corpse_dup = FALSE;
1531 new_thread->turnstile = turnstile_alloc();
1532 *out_thread = new_thread;
1533
1534 if (kdebug_enable) {
1535 long args[4] = {};
1536
1537 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1538
1539 /*
1540 * Starting with 26604425, exec'ing creates a new task/thread.
1541 *
1542 * NEWTHREAD in the current process has two possible meanings:
1543 *
1544 * 1) Create a new thread for this process.
1545 * 2) Create a new thread for the future process this will become in an
1546 * exec.
1547 *
1548 * To disambiguate these, arg3 will be set to TRUE for case #2.
1549 *
1550 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1551 * task exec'ing. The read of t_procflags does not take the proc_lock.
1552 */
1553 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1554
1555 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1556 args[1], args[2], args[3]);
1557
1558 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1559 &args[2], &args[3]);
1560 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1561 args[3]);
1562 }
1563
1564 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1565
1566 return (KERN_SUCCESS);
1567 }
1568
1569 static kern_return_t
1570 thread_create_internal2(
1571 task_t task,
1572 thread_t *new_thread,
1573 boolean_t from_user,
1574 thread_continue_t continuation)
1575 {
1576 kern_return_t result;
1577 thread_t thread;
1578
1579 if (task == TASK_NULL || task == kernel_task)
1580 return (KERN_INVALID_ARGUMENT);
1581
1582 result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
1583 if (result != KERN_SUCCESS)
1584 return (result);
1585
1586 thread->user_stop_count = 1;
1587 thread_hold(thread);
1588 if (task->suspend_count > 0)
1589 thread_hold(thread);
1590
1591 if (from_user)
1592 extmod_statistics_incr_thread_create(task);
1593
1594 task_unlock(task);
1595 lck_mtx_unlock(&tasks_threads_lock);
1596
1597 *new_thread = thread;
1598
1599 return (KERN_SUCCESS);
1600 }
1601
1602 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1603 kern_return_t
1604 thread_create(
1605 task_t task,
1606 thread_t *new_thread);
1607
1608 kern_return_t
1609 thread_create(
1610 task_t task,
1611 thread_t *new_thread)
1612 {
1613 return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1614 }
1615
1616 kern_return_t
1617 thread_create_from_user(
1618 task_t task,
1619 thread_t *new_thread)
1620 {
1621 return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1622 }
1623
1624 kern_return_t
1625 thread_create_with_continuation(
1626 task_t task,
1627 thread_t *new_thread,
1628 thread_continue_t continuation)
1629 {
1630 return thread_create_internal2(task, new_thread, FALSE, continuation);
1631 }
1632
1633 /*
1634 * Create a thread that is already started, but is waiting on an event
1635 */
1636 static kern_return_t
1637 thread_create_waiting_internal(
1638 task_t task,
1639 thread_continue_t continuation,
1640 event_t event,
1641 block_hint_t block_hint,
1642 int options,
1643 thread_t *new_thread)
1644 {
1645 kern_return_t result;
1646 thread_t thread;
1647
1648 if (task == TASK_NULL || task == kernel_task)
1649 return (KERN_INVALID_ARGUMENT);
1650
1651 result = thread_create_internal(task, -1, continuation, NULL,
1652 options, &thread);
1653 if (result != KERN_SUCCESS)
1654 return (result);
1655
1656 /* note no user_stop_count or thread_hold here */
1657
1658 if (task->suspend_count > 0)
1659 thread_hold(thread);
1660
1661 thread_mtx_lock(thread);
1662 thread_set_pending_block_hint(thread, block_hint);
1663 if (options & TH_OPTION_WORKQ) {
1664 thread->static_param = true;
1665 event = workq_thread_init_and_wq_lock(task, thread);
1666 }
1667 thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1668 thread_mtx_unlock(thread);
1669
1670 task_unlock(task);
1671 lck_mtx_unlock(&tasks_threads_lock);
1672
1673 *new_thread = thread;
1674
1675 return (KERN_SUCCESS);
1676 }
1677
1678 kern_return_t
1679 thread_create_waiting(
1680 task_t task,
1681 thread_continue_t continuation,
1682 event_t event,
1683 thread_t *new_thread)
1684 {
1685 return thread_create_waiting_internal(task, continuation, event,
1686 kThreadWaitNone, TH_OPTION_NONE, new_thread);
1687 }
1688
1689
1690 static kern_return_t
1691 thread_create_running_internal2(
1692 task_t task,
1693 int flavor,
1694 thread_state_t new_state,
1695 mach_msg_type_number_t new_state_count,
1696 thread_t *new_thread,
1697 boolean_t from_user)
1698 {
1699 kern_return_t result;
1700 thread_t thread;
1701
1702 if (task == TASK_NULL || task == kernel_task)
1703 return (KERN_INVALID_ARGUMENT);
1704
1705 result = thread_create_internal(task, -1,
1706 (thread_continue_t)thread_bootstrap_return, NULL,
1707 TH_OPTION_NONE, &thread);
1708 if (result != KERN_SUCCESS)
1709 return (result);
1710
1711 if (task->suspend_count > 0)
1712 thread_hold(thread);
1713
1714 if (from_user) {
1715 result = machine_thread_state_convert_from_user(thread, flavor,
1716 new_state, new_state_count);
1717 }
1718 if (result == KERN_SUCCESS) {
1719 result = machine_thread_set_state(thread, flavor, new_state,
1720 new_state_count);
1721 }
1722 if (result != KERN_SUCCESS) {
1723 task_unlock(task);
1724 lck_mtx_unlock(&tasks_threads_lock);
1725
1726 thread_terminate(thread);
1727 thread_deallocate(thread);
1728 return (result);
1729 }
1730
1731 thread_mtx_lock(thread);
1732 thread_start(thread);
1733 thread_mtx_unlock(thread);
1734
1735 if (from_user)
1736 extmod_statistics_incr_thread_create(task);
1737
1738 task_unlock(task);
1739 lck_mtx_unlock(&tasks_threads_lock);
1740
1741 *new_thread = thread;
1742
1743 return (result);
1744 }
1745
1746 /* Prototype, see justification above */
1747 kern_return_t
1748 thread_create_running(
1749 task_t task,
1750 int flavor,
1751 thread_state_t new_state,
1752 mach_msg_type_number_t new_state_count,
1753 thread_t *new_thread);
1754
1755 kern_return_t
1756 thread_create_running(
1757 task_t task,
1758 int flavor,
1759 thread_state_t new_state,
1760 mach_msg_type_number_t new_state_count,
1761 thread_t *new_thread)
1762 {
1763 return thread_create_running_internal2(
1764 task, flavor, new_state, new_state_count,
1765 new_thread, FALSE);
1766 }
1767
1768 kern_return_t
1769 thread_create_running_from_user(
1770 task_t task,
1771 int flavor,
1772 thread_state_t new_state,
1773 mach_msg_type_number_t new_state_count,
1774 thread_t *new_thread)
1775 {
1776 return thread_create_running_internal2(
1777 task, flavor, new_state, new_state_count,
1778 new_thread, TRUE);
1779 }
1780
1781 kern_return_t
1782 thread_create_workq_waiting(
1783 task_t task,
1784 thread_continue_t continuation,
1785 thread_t *new_thread)
1786 {
1787 int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1788 return thread_create_waiting_internal(task, continuation, NULL,
1789 kThreadWaitParkedWorkQueue, options, new_thread);
1790 }
1791
1792 /*
1793 * kernel_thread_create:
1794 *
1795 * Create a thread in the kernel task
1796 * to execute in kernel context.
1797 */
1798 kern_return_t
1799 kernel_thread_create(
1800 thread_continue_t continuation,
1801 void *parameter,
1802 integer_t priority,
1803 thread_t *new_thread)
1804 {
1805 kern_return_t result;
1806 thread_t thread;
1807 task_t task = kernel_task;
1808
1809 result = thread_create_internal(task, priority, continuation, parameter,
1810 TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1811 if (result != KERN_SUCCESS)
1812 return (result);
1813
1814 task_unlock(task);
1815 lck_mtx_unlock(&tasks_threads_lock);
1816
1817 stack_alloc(thread);
1818 assert(thread->kernel_stack != 0);
1819 #if CONFIG_EMBEDDED
1820 if (priority > BASEPRI_KERNEL)
1821 #endif
1822 thread->reserved_stack = thread->kernel_stack;
1823
1824 if(debug_task & 1)
1825 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1826 *new_thread = thread;
1827
1828 return (result);
1829 }
1830
1831 kern_return_t
1832 kernel_thread_start_priority(
1833 thread_continue_t continuation,
1834 void *parameter,
1835 integer_t priority,
1836 thread_t *new_thread)
1837 {
1838 kern_return_t result;
1839 thread_t thread;
1840
1841 result = kernel_thread_create(continuation, parameter, priority, &thread);
1842 if (result != KERN_SUCCESS)
1843 return (result);
1844
1845 *new_thread = thread;
1846
1847 thread_mtx_lock(thread);
1848 thread_start(thread);
1849 thread_mtx_unlock(thread);
1850
1851 return (result);
1852 }
1853
1854 kern_return_t
1855 kernel_thread_start(
1856 thread_continue_t continuation,
1857 void *parameter,
1858 thread_t *new_thread)
1859 {
1860 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1861 }
1862
1863 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1864 /* it is assumed that the thread is locked by the caller */
1865 static void
1866 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1867 {
1868 int state, flags;
1869
1870 /* fill in info */
1871
1872 thread_read_times(thread, &basic_info->user_time,
1873 &basic_info->system_time, NULL);
1874
1875 /*
1876 * Update lazy-evaluated scheduler info because someone wants it.
1877 */
1878 if (SCHED(can_update_priority)(thread))
1879 SCHED(update_priority)(thread);
1880
1881 basic_info->sleep_time = 0;
1882
1883 /*
1884 * To calculate cpu_usage, first correct for timer rate,
1885 * then for 5/8 ageing. The correction factor [3/5] is
1886 * (1/(5/8) - 1).
1887 */
1888 basic_info->cpu_usage = 0;
1889 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1890 if (sched_tick_interval) {
1891 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1892 * TH_USAGE_SCALE) / sched_tick_interval);
1893 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1894 }
1895 #endif
1896
1897 if (basic_info->cpu_usage > TH_USAGE_SCALE)
1898 basic_info->cpu_usage = TH_USAGE_SCALE;
1899
1900 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1901 POLICY_TIMESHARE: POLICY_RR);
1902
1903 flags = 0;
1904 if (thread->options & TH_OPT_IDLE_THREAD)
1905 flags |= TH_FLAGS_IDLE;
1906
1907 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1908 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1909 }
1910
1911 if (!thread->kernel_stack)
1912 flags |= TH_FLAGS_SWAPPED;
1913
1914 state = 0;
1915 if (thread->state & TH_TERMINATE)
1916 state = TH_STATE_HALTED;
1917 else
1918 if (thread->state & TH_RUN)
1919 state = TH_STATE_RUNNING;
1920 else
1921 if (thread->state & TH_UNINT)
1922 state = TH_STATE_UNINTERRUPTIBLE;
1923 else
1924 if (thread->state & TH_SUSP)
1925 state = TH_STATE_STOPPED;
1926 else
1927 if (thread->state & TH_WAIT)
1928 state = TH_STATE_WAITING;
1929
1930 basic_info->run_state = state;
1931 basic_info->flags = flags;
1932
1933 basic_info->suspend_count = thread->user_stop_count;
1934
1935 return;
1936 }
1937
1938 kern_return_t
1939 thread_info_internal(
1940 thread_t thread,
1941 thread_flavor_t flavor,
1942 thread_info_t thread_info_out, /* ptr to OUT array */
1943 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
1944 {
1945 spl_t s;
1946
1947 if (thread == THREAD_NULL)
1948 return (KERN_INVALID_ARGUMENT);
1949
1950 if (flavor == THREAD_BASIC_INFO) {
1951
1952 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1953 return (KERN_INVALID_ARGUMENT);
1954
1955 s = splsched();
1956 thread_lock(thread);
1957
1958 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1959
1960 thread_unlock(thread);
1961 splx(s);
1962
1963 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1964
1965 return (KERN_SUCCESS);
1966 }
1967 else
1968 if (flavor == THREAD_IDENTIFIER_INFO) {
1969 thread_identifier_info_t identifier_info;
1970
1971 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1972 return (KERN_INVALID_ARGUMENT);
1973
1974 identifier_info = (thread_identifier_info_t) thread_info_out;
1975
1976 s = splsched();
1977 thread_lock(thread);
1978
1979 identifier_info->thread_id = thread->thread_id;
1980 identifier_info->thread_handle = thread->machine.cthread_self;
1981 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1982
1983 thread_unlock(thread);
1984 splx(s);
1985 return KERN_SUCCESS;
1986 }
1987 else
1988 if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1989 policy_timeshare_info_t ts_info;
1990
1991 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1992 return (KERN_INVALID_ARGUMENT);
1993
1994 ts_info = (policy_timeshare_info_t)thread_info_out;
1995
1996 s = splsched();
1997 thread_lock(thread);
1998
1999 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2000 thread_unlock(thread);
2001 splx(s);
2002 return (KERN_INVALID_POLICY);
2003 }
2004
2005 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2006 if (ts_info->depressed) {
2007 ts_info->base_priority = DEPRESSPRI;
2008 ts_info->depress_priority = thread->base_pri;
2009 }
2010 else {
2011 ts_info->base_priority = thread->base_pri;
2012 ts_info->depress_priority = -1;
2013 }
2014
2015 ts_info->cur_priority = thread->sched_pri;
2016 ts_info->max_priority = thread->max_priority;
2017
2018 thread_unlock(thread);
2019 splx(s);
2020
2021 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2022
2023 return (KERN_SUCCESS);
2024 }
2025 else
2026 if (flavor == THREAD_SCHED_FIFO_INFO) {
2027 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
2028 return (KERN_INVALID_ARGUMENT);
2029
2030 return (KERN_INVALID_POLICY);
2031 }
2032 else
2033 if (flavor == THREAD_SCHED_RR_INFO) {
2034 policy_rr_info_t rr_info;
2035 uint32_t quantum_time;
2036 uint64_t quantum_ns;
2037
2038 if (*thread_info_count < POLICY_RR_INFO_COUNT)
2039 return (KERN_INVALID_ARGUMENT);
2040
2041 rr_info = (policy_rr_info_t) thread_info_out;
2042
2043 s = splsched();
2044 thread_lock(thread);
2045
2046 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2047 thread_unlock(thread);
2048 splx(s);
2049
2050 return (KERN_INVALID_POLICY);
2051 }
2052
2053 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2054 if (rr_info->depressed) {
2055 rr_info->base_priority = DEPRESSPRI;
2056 rr_info->depress_priority = thread->base_pri;
2057 }
2058 else {
2059 rr_info->base_priority = thread->base_pri;
2060 rr_info->depress_priority = -1;
2061 }
2062
2063 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2064 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2065
2066 rr_info->max_priority = thread->max_priority;
2067 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2068
2069 thread_unlock(thread);
2070 splx(s);
2071
2072 *thread_info_count = POLICY_RR_INFO_COUNT;
2073
2074 return (KERN_SUCCESS);
2075 }
2076 else
2077 if (flavor == THREAD_EXTENDED_INFO) {
2078 thread_basic_info_data_t basic_info;
2079 thread_extended_info_t extended_info = (thread_extended_info_t) thread_info_out;
2080
2081 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2082 return (KERN_INVALID_ARGUMENT);
2083 }
2084
2085 s = splsched();
2086 thread_lock(thread);
2087
2088 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2089 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2090 */
2091 retrieve_thread_basic_info(thread, &basic_info);
2092 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
2093 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
2094
2095 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2096 extended_info->pth_policy = basic_info.policy;
2097 extended_info->pth_run_state = basic_info.run_state;
2098 extended_info->pth_flags = basic_info.flags;
2099 extended_info->pth_sleep_time = basic_info.sleep_time;
2100 extended_info->pth_curpri = thread->sched_pri;
2101 extended_info->pth_priority = thread->base_pri;
2102 extended_info->pth_maxpriority = thread->max_priority;
2103
2104 bsd_getthreadname(thread->uthread,extended_info->pth_name);
2105
2106 thread_unlock(thread);
2107 splx(s);
2108
2109 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2110
2111 return (KERN_SUCCESS);
2112 }
2113 else
2114 if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2115 #if DEVELOPMENT || DEBUG
2116 thread_debug_info_internal_t dbg_info;
2117 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
2118 return (KERN_NOT_SUPPORTED);
2119
2120 if (thread_info_out == NULL)
2121 return (KERN_INVALID_ARGUMENT);
2122
2123 dbg_info = (thread_debug_info_internal_t) thread_info_out;
2124 dbg_info->page_creation_count = thread->t_page_creation_count;
2125
2126 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2127 return (KERN_SUCCESS);
2128 #endif /* DEVELOPMENT || DEBUG */
2129 return (KERN_NOT_SUPPORTED);
2130 }
2131
2132 return (KERN_INVALID_ARGUMENT);
2133 }
2134
2135 void
2136 thread_read_times(
2137 thread_t thread,
2138 time_value_t *user_time,
2139 time_value_t *system_time,
2140 time_value_t *runnable_time)
2141 {
2142 clock_sec_t secs;
2143 clock_usec_t usecs;
2144 uint64_t tval_user, tval_system;
2145
2146 tval_user = timer_grab(&thread->user_timer);
2147 tval_system = timer_grab(&thread->system_timer);
2148
2149 if (thread->precise_user_kernel_time) {
2150 absolutetime_to_microtime(tval_user, &secs, &usecs);
2151 user_time->seconds = (typeof(user_time->seconds))secs;
2152 user_time->microseconds = usecs;
2153
2154 absolutetime_to_microtime(tval_system, &secs, &usecs);
2155 system_time->seconds = (typeof(system_time->seconds))secs;
2156 system_time->microseconds = usecs;
2157 } else {
2158 /* system_timer may represent either sys or user */
2159 tval_user += tval_system;
2160 absolutetime_to_microtime(tval_user, &secs, &usecs);
2161 user_time->seconds = (typeof(user_time->seconds))secs;
2162 user_time->microseconds = usecs;
2163
2164 system_time->seconds = 0;
2165 system_time->microseconds = 0;
2166 }
2167
2168 if (runnable_time) {
2169 uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
2170 absolutetime_to_microtime(tval_runnable, &secs, &usecs);
2171 runnable_time->seconds = (typeof(runnable_time->seconds))secs;
2172 runnable_time->microseconds = usecs;
2173 }
2174 }
2175
2176 uint64_t thread_get_runtime_self(void)
2177 {
2178 boolean_t interrupt_state;
2179 uint64_t runtime;
2180 thread_t thread = NULL;
2181 processor_t processor = NULL;
2182
2183 thread = current_thread();
2184
2185 /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2186 interrupt_state = ml_set_interrupts_enabled(FALSE);
2187 processor = current_processor();
2188 timer_update(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time());
2189 runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2190 ml_set_interrupts_enabled(interrupt_state);
2191
2192 return runtime;
2193 }
2194
2195 kern_return_t
2196 thread_assign(
2197 __unused thread_t thread,
2198 __unused processor_set_t new_pset)
2199 {
2200 return (KERN_FAILURE);
2201 }
2202
2203 /*
2204 * thread_assign_default:
2205 *
2206 * Special version of thread_assign for assigning threads to default
2207 * processor set.
2208 */
2209 kern_return_t
2210 thread_assign_default(
2211 thread_t thread)
2212 {
2213 return (thread_assign(thread, &pset0));
2214 }
2215
2216 /*
2217 * thread_get_assignment
2218 *
2219 * Return current assignment for this thread.
2220 */
2221 kern_return_t
2222 thread_get_assignment(
2223 thread_t thread,
2224 processor_set_t *pset)
2225 {
2226 if (thread == NULL)
2227 return (KERN_INVALID_ARGUMENT);
2228
2229 *pset = &pset0;
2230
2231 return (KERN_SUCCESS);
2232 }
2233
2234 /*
2235 * thread_wire_internal:
2236 *
2237 * Specify that the target thread must always be able
2238 * to run and to allocate memory.
2239 */
2240 kern_return_t
2241 thread_wire_internal(
2242 host_priv_t host_priv,
2243 thread_t thread,
2244 boolean_t wired,
2245 boolean_t *prev_state)
2246 {
2247 if (host_priv == NULL || thread != current_thread())
2248 return (KERN_INVALID_ARGUMENT);
2249
2250 assert(host_priv == &realhost);
2251
2252 if (prev_state)
2253 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2254
2255 if (wired) {
2256 if (!(thread->options & TH_OPT_VMPRIV))
2257 vm_page_free_reserve(1); /* XXX */
2258 thread->options |= TH_OPT_VMPRIV;
2259 }
2260 else {
2261 if (thread->options & TH_OPT_VMPRIV)
2262 vm_page_free_reserve(-1); /* XXX */
2263 thread->options &= ~TH_OPT_VMPRIV;
2264 }
2265
2266 return (KERN_SUCCESS);
2267 }
2268
2269
2270 /*
2271 * thread_wire:
2272 *
2273 * User-api wrapper for thread_wire_internal()
2274 */
2275 kern_return_t
2276 thread_wire(
2277 host_priv_t host_priv,
2278 thread_t thread,
2279 boolean_t wired)
2280 {
2281 return (thread_wire_internal(host_priv, thread, wired, NULL));
2282 }
2283
2284
2285 boolean_t
2286 is_vm_privileged(void)
2287 {
2288 return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2289 }
2290
2291 boolean_t
2292 set_vm_privilege(boolean_t privileged)
2293 {
2294 boolean_t was_vmpriv;
2295
2296 if (current_thread()->options & TH_OPT_VMPRIV)
2297 was_vmpriv = TRUE;
2298 else
2299 was_vmpriv = FALSE;
2300
2301 if (privileged != FALSE)
2302 current_thread()->options |= TH_OPT_VMPRIV;
2303 else
2304 current_thread()->options &= ~TH_OPT_VMPRIV;
2305
2306 return (was_vmpriv);
2307 }
2308
2309 void
2310 set_thread_rwlock_boost(void)
2311 {
2312 current_thread()->rwlock_count++;
2313 }
2314
2315 void
2316 clear_thread_rwlock_boost(void)
2317 {
2318 thread_t thread = current_thread();
2319
2320 if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2321
2322 lck_rw_clear_promotion(thread, 0);
2323 }
2324 }
2325
2326
2327 /*
2328 * XXX assuming current thread only, for now...
2329 */
2330 void
2331 thread_guard_violation(thread_t thread,
2332 mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2333 {
2334 assert(thread == current_thread());
2335
2336 /* don't set up the AST for kernel threads */
2337 if (thread->task == kernel_task)
2338 return;
2339
2340 spl_t s = splsched();
2341 /*
2342 * Use the saved state area of the thread structure
2343 * to store all info required to handle the AST when
2344 * returning to userspace
2345 */
2346 assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2347 thread->guard_exc_info.code = code;
2348 thread->guard_exc_info.subcode = subcode;
2349 thread_ast_set(thread, AST_GUARD);
2350 ast_propagate(thread);
2351
2352 splx(s);
2353 }
2354
2355 /*
2356 * guard_ast:
2357 *
2358 * Handle AST_GUARD for a thread. This routine looks at the
2359 * state saved in the thread structure to determine the cause
2360 * of this exception. Based on this value, it invokes the
2361 * appropriate routine which determines other exception related
2362 * info and raises the exception.
2363 */
2364 void
2365 guard_ast(thread_t t)
2366 {
2367 const mach_exception_data_type_t
2368 code = t->guard_exc_info.code,
2369 subcode = t->guard_exc_info.subcode;
2370
2371 t->guard_exc_info.code = 0;
2372 t->guard_exc_info.subcode = 0;
2373
2374 switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2375 case GUARD_TYPE_NONE:
2376 /* lingering AST_GUARD on the processor? */
2377 break;
2378 case GUARD_TYPE_MACH_PORT:
2379 mach_port_guard_ast(t, code, subcode);
2380 break;
2381 case GUARD_TYPE_FD:
2382 fd_guard_ast(t, code, subcode);
2383 break;
2384 #if CONFIG_VNGUARD
2385 case GUARD_TYPE_VN:
2386 vn_guard_ast(t, code, subcode);
2387 break;
2388 #endif
2389 case GUARD_TYPE_VIRT_MEMORY:
2390 virt_memory_guard_ast(t, code, subcode);
2391 break;
2392 default:
2393 panic("guard_exc_info %llx %llx", code, subcode);
2394 }
2395 }
2396
2397 static void
2398 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2399 {
2400 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2401 #if CONFIG_TELEMETRY
2402 /*
2403 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2404 * on the entire task so there are micro-stackshots available if and when
2405 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2406 * for this thread only; but now that this task is suspect, knowing what all of
2407 * its threads are up to will be useful.
2408 */
2409 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2410 #endif
2411 return;
2412 }
2413
2414 #if CONFIG_TELEMETRY
2415 /*
2416 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2417 * exceeded the limit, turn telemetry off for the task.
2418 */
2419 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2420 #endif
2421
2422 if (warning == 0) {
2423 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2424 }
2425 }
2426
2427 void __attribute__((noinline))
2428 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2429 {
2430 int pid = 0;
2431 task_t task = current_task();
2432 thread_t thread = current_thread();
2433 uint64_t tid = thread->thread_id;
2434 const char *procname = "unknown";
2435 time_value_t thread_total_time = {0, 0};
2436 time_value_t thread_system_time;
2437 time_value_t thread_user_time;
2438 int action;
2439 uint8_t percentage;
2440 uint32_t usage_percent = 0;
2441 uint32_t interval_sec;
2442 uint64_t interval_ns;
2443 uint64_t balance_ns;
2444 boolean_t fatal = FALSE;
2445 boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2446 kern_return_t kr;
2447
2448 #ifdef EXC_RESOURCE_MONITORS
2449 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2450 #endif /* EXC_RESOURCE_MONITORS */
2451 struct ledger_entry_info lei;
2452
2453 assert(thread->t_threadledger != LEDGER_NULL);
2454
2455 /*
2456 * Extract the fatal bit and suspend the monitor (which clears the bit).
2457 */
2458 task_lock(task);
2459 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2460 fatal = TRUE;
2461 send_exc_resource = TRUE;
2462 }
2463 /* Only one thread can be here at a time. Whichever makes it through
2464 first will successfully suspend the monitor and proceed to send the
2465 notification. Other threads will get an error trying to suspend the
2466 monitor and give up on sending the notification. In the first release,
2467 the monitor won't be resumed for a number of seconds, but we may
2468 eventually need to handle low-latency resume.
2469 */
2470 kr = task_suspend_cpumon(task);
2471 task_unlock(task);
2472 if (kr == KERN_INVALID_ARGUMENT) return;
2473
2474 #ifdef MACH_BSD
2475 pid = proc_selfpid();
2476 if (task->bsd_info != NULL) {
2477 procname = proc_name_address(task->bsd_info);
2478 }
2479 #endif
2480
2481 thread_get_cpulimit(&action, &percentage, &interval_ns);
2482
2483 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2484
2485 thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2486 time_value_add(&thread_total_time, &thread_user_time);
2487 time_value_add(&thread_total_time, &thread_system_time);
2488 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2489
2490 /* credit/debit/balance/limit are in absolute time units;
2491 the refill info is in nanoseconds. */
2492 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2493 if (lei.lei_last_refill > 0) {
2494 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2495 }
2496
2497 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2498 printf("process %s[%d] thread %llu caught burning CPU! "
2499 "It used more than %d%% CPU over %u seconds "
2500 "(actual recent usage: %d%% over ~%llu seconds). "
2501 "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2502 "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2503 "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2504 procname, pid, tid,
2505 percentage, interval_sec,
2506 usage_percent,
2507 (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2508 thread_total_time.seconds, thread_total_time.microseconds,
2509 thread_user_time.seconds, thread_user_time.microseconds,
2510 thread_system_time.seconds,thread_system_time.microseconds,
2511 lei.lei_balance, lei.lei_credit, lei.lei_debit,
2512 lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2513 (fatal ? " [fatal violation]" : ""));
2514
2515 /*
2516 For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2517 we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2518 */
2519
2520 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2521 lei.lei_balance = balance_ns;
2522 absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2523 trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2524 kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2525 fatal ? kRNFatalLimitFlag : 0);
2526 if (kr) {
2527 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2528 }
2529
2530 #ifdef EXC_RESOURCE_MONITORS
2531 if (send_exc_resource) {
2532 if (disable_exc_resource) {
2533 printf("process %s[%d] thread %llu caught burning CPU! "
2534 "EXC_RESOURCE%s supressed by a boot-arg\n",
2535 procname, pid, tid, fatal ? " (and termination)" : "");
2536 return;
2537 }
2538
2539 if (audio_active) {
2540 printf("process %s[%d] thread %llu caught burning CPU! "
2541 "EXC_RESOURCE & termination supressed due to audio playback\n",
2542 procname, pid, tid);
2543 return;
2544 }
2545 }
2546
2547
2548 if (send_exc_resource) {
2549 code[0] = code[1] = 0;
2550 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2551 if (fatal) {
2552 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2553 }else {
2554 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2555 }
2556 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2557 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2558 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2559 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2560 }
2561 #endif /* EXC_RESOURCE_MONITORS */
2562
2563 if (fatal) {
2564 #if CONFIG_JETSAM
2565 jetsam_on_ledger_cpulimit_exceeded();
2566 #else
2567 task_terminate_internal(task);
2568 #endif
2569 }
2570 }
2571
2572 #if DEVELOPMENT || DEBUG
2573 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2574 {
2575 mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2576 int pid = task_pid(task);
2577 char procname[MAXCOMLEN+1] = "unknown";
2578
2579 if (pid == 1) {
2580 /*
2581 * Cannot suspend launchd
2582 */
2583 return;
2584 }
2585
2586 proc_name(pid, procname, sizeof(procname));
2587
2588 if (disable_exc_resource) {
2589 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2590 "supressed by a boot-arg. \n", procname, pid, thread_count);
2591 return;
2592 }
2593
2594 if (audio_active) {
2595 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2596 "supressed due to audio playback.\n", procname, pid, thread_count);
2597 return;
2598 }
2599
2600 if (exc_via_corpse_forking == 0) {
2601 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2602 "supressed due to corpse forking being disabled.\n", procname, pid,
2603 thread_count);
2604 return;
2605 }
2606
2607 printf("process %s[%d] crossed thread count high watermark (%d), sending "
2608 "EXC_RESOURCE\n", procname, pid, thread_count);
2609
2610 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2611 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2612 EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2613
2614 task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
2615 }
2616 #endif /* DEVELOPMENT || DEBUG */
2617
2618 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2619 {
2620 int io_tier;
2621
2622 if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2623 return;
2624
2625 if (io_flags & DKIO_READ) {
2626 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2627 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2628 }
2629
2630 if (io_flags & DKIO_META) {
2631 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2632 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2633 }
2634
2635 if (io_flags & DKIO_PAGING) {
2636 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2637 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2638 }
2639
2640 io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2641 assert (io_tier < IO_NUM_PRIORITIES);
2642
2643 UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2644 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2645
2646 /* Update Total I/O Counts */
2647 UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2648 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2649
2650 if (!(io_flags & DKIO_READ)) {
2651 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2652 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2653 }
2654 }
2655
2656 static void
2657 init_thread_ledgers(void) {
2658 ledger_template_t t;
2659 int idx;
2660
2661 assert(thread_ledger_template == NULL);
2662
2663 if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2664 panic("couldn't create thread ledger template");
2665
2666 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2667 panic("couldn't create cpu_time entry for thread ledger template");
2668 }
2669
2670 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2671 panic("couldn't set thread ledger callback for cpu_time entry");
2672 }
2673
2674 thread_ledgers.cpu_time = idx;
2675
2676 ledger_template_complete(t);
2677 thread_ledger_template = t;
2678 }
2679
2680 /*
2681 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2682 */
2683 int
2684 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2685 {
2686 int64_t abstime = 0;
2687 uint64_t limittime = 0;
2688 thread_t thread = current_thread();
2689
2690 *percentage = 0;
2691 *interval_ns = 0;
2692 *action = 0;
2693
2694 if (thread->t_threadledger == LEDGER_NULL) {
2695 /*
2696 * This thread has no per-thread ledger, so it can't possibly
2697 * have a CPU limit applied.
2698 */
2699 return (KERN_SUCCESS);
2700 }
2701
2702 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2703 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2704
2705 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2706 /*
2707 * This thread's CPU time ledger has no period or limit; so it
2708 * doesn't have a CPU limit applied.
2709 */
2710 return (KERN_SUCCESS);
2711 }
2712
2713 /*
2714 * This calculation is the converse to the one in thread_set_cpulimit().
2715 */
2716 absolutetime_to_nanoseconds(abstime, &limittime);
2717 *percentage = (limittime * 100ULL) / *interval_ns;
2718 assert(*percentage <= 100);
2719
2720 if (thread->options & TH_OPT_PROC_CPULIMIT) {
2721 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2722
2723 *action = THREAD_CPULIMIT_BLOCK;
2724 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2725 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2726
2727 *action = THREAD_CPULIMIT_EXCEPTION;
2728 } else {
2729 *action = THREAD_CPULIMIT_DISABLE;
2730 }
2731
2732 return (KERN_SUCCESS);
2733 }
2734
2735 /*
2736 * Set CPU usage limit on a thread.
2737 *
2738 * Calling with percentage of 0 will unset the limit for this thread.
2739 */
2740 int
2741 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2742 {
2743 thread_t thread = current_thread();
2744 ledger_t l;
2745 uint64_t limittime = 0;
2746 uint64_t abstime = 0;
2747
2748 assert(percentage <= 100);
2749
2750 if (action == THREAD_CPULIMIT_DISABLE) {
2751 /*
2752 * Remove CPU limit, if any exists.
2753 */
2754 if (thread->t_threadledger != LEDGER_NULL) {
2755 l = thread->t_threadledger;
2756 ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2757 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2758 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2759 }
2760
2761 return (0);
2762 }
2763
2764 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2765 return (KERN_INVALID_ARGUMENT);
2766 }
2767
2768 l = thread->t_threadledger;
2769 if (l == LEDGER_NULL) {
2770 /*
2771 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2772 */
2773 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2774 return (KERN_RESOURCE_SHORTAGE);
2775
2776 /*
2777 * We are the first to create this thread's ledger, so only activate our entry.
2778 */
2779 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2780 thread->t_threadledger = l;
2781 }
2782
2783 /*
2784 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2785 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2786 */
2787 limittime = (interval_ns * percentage) / 100;
2788 nanoseconds_to_absolutetime(limittime, &abstime);
2789 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2790 /*
2791 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2792 */
2793 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2794
2795 if (action == THREAD_CPULIMIT_EXCEPTION) {
2796 /*
2797 * We don't support programming the CPU usage monitor on a task if any of its
2798 * threads have a per-thread blocking CPU limit configured.
2799 */
2800 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2801 panic("CPU usage monitor activated, but blocking thread limit exists");
2802 }
2803
2804 /*
2805 * Make a note that this thread's CPU limit is being used for the task-wide CPU
2806 * usage monitor. We don't have to arm the callback which will trigger the
2807 * exception, because that was done for us in ledger_instantiate (because the
2808 * ledger template used has a default callback).
2809 */
2810 thread->options |= TH_OPT_PROC_CPULIMIT;
2811 } else {
2812 /*
2813 * We deliberately override any CPU limit imposed by a task-wide limit (eg
2814 * CPU usage monitor).
2815 */
2816 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2817
2818 thread->options |= TH_OPT_PRVT_CPULIMIT;
2819 /* The per-thread ledger template by default has a callback for CPU time */
2820 ledger_disable_callback(l, thread_ledgers.cpu_time);
2821 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2822 }
2823
2824 return (0);
2825 }
2826
2827 void
2828 thread_sched_call(
2829 thread_t thread,
2830 sched_call_t call)
2831 {
2832 assert((thread->state & TH_WAIT_REPORT) == 0);
2833 thread->sched_call = call;
2834 }
2835
2836 uint64_t
2837 thread_tid(
2838 thread_t thread)
2839 {
2840 return (thread != THREAD_NULL? thread->thread_id: 0);
2841 }
2842
2843 uint16_t
2844 thread_set_tag(thread_t th, uint16_t tag)
2845 {
2846 return thread_set_tag_internal(th, tag);
2847 }
2848
2849 uint16_t
2850 thread_get_tag(thread_t th)
2851 {
2852 return thread_get_tag_internal(th);
2853 }
2854
2855 uint64_t
2856 thread_last_run_time(thread_t th)
2857 {
2858 return th->last_run_time;
2859 }
2860
2861 uint64_t
2862 thread_dispatchqaddr(
2863 thread_t thread)
2864 {
2865 uint64_t dispatchqueue_addr;
2866 uint64_t thread_handle;
2867
2868 if (thread == THREAD_NULL)
2869 return 0;
2870
2871 thread_handle = thread->machine.cthread_self;
2872 if (thread_handle == 0)
2873 return 0;
2874
2875 if (thread->inspection == TRUE)
2876 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2877 else if (thread->task->bsd_info)
2878 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2879 else
2880 dispatchqueue_addr = 0;
2881
2882 return dispatchqueue_addr;
2883 }
2884
2885 uint64_t
2886 thread_rettokern_addr(
2887 thread_t thread)
2888 {
2889 uint64_t rettokern_addr;
2890 uint64_t rettokern_offset;
2891 uint64_t thread_handle;
2892
2893 if (thread == THREAD_NULL)
2894 return 0;
2895
2896 thread_handle = thread->machine.cthread_self;
2897 if (thread_handle == 0)
2898 return 0;
2899
2900 if (thread->task->bsd_info) {
2901 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2902
2903 /* Return 0 if return to kernel offset is not initialized. */
2904 if (rettokern_offset == 0) {
2905 rettokern_addr = 0;
2906 } else {
2907 rettokern_addr = thread_handle + rettokern_offset;
2908 }
2909 } else {
2910 rettokern_addr = 0;
2911 }
2912
2913 return rettokern_addr;
2914 }
2915
2916 /*
2917 * Export routines to other components for things that are done as macros
2918 * within the osfmk component.
2919 */
2920
2921 #undef thread_mtx_lock
2922 void thread_mtx_lock(thread_t thread);
2923 void
2924 thread_mtx_lock(thread_t thread)
2925 {
2926 lck_mtx_lock(&thread->mutex);
2927 }
2928
2929 #undef thread_mtx_unlock
2930 void thread_mtx_unlock(thread_t thread);
2931 void
2932 thread_mtx_unlock(thread_t thread)
2933 {
2934 lck_mtx_unlock(&thread->mutex);
2935 }
2936
2937 #undef thread_reference
2938 void thread_reference(thread_t thread);
2939 void
2940 thread_reference(
2941 thread_t thread)
2942 {
2943 if (thread != THREAD_NULL)
2944 thread_reference_internal(thread);
2945 }
2946
2947 #undef thread_should_halt
2948
2949 boolean_t
2950 thread_should_halt(
2951 thread_t th)
2952 {
2953 return (thread_should_halt_fast(th));
2954 }
2955
2956 /*
2957 * thread_set_voucher_name - reset the voucher port name bound to this thread
2958 *
2959 * Conditions: nothing locked
2960 *
2961 * If we already converted the previous name to a cached voucher
2962 * reference, then we discard that reference here. The next lookup
2963 * will cache it again.
2964 */
2965
2966 kern_return_t
2967 thread_set_voucher_name(mach_port_name_t voucher_name)
2968 {
2969 thread_t thread = current_thread();
2970 ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2971 ipc_voucher_t voucher;
2972 ledger_t bankledger = NULL;
2973 struct thread_group *banktg = NULL;
2974
2975 if (MACH_PORT_DEAD == voucher_name)
2976 return KERN_INVALID_RIGHT;
2977
2978 /*
2979 * agressively convert to voucher reference
2980 */
2981 if (MACH_PORT_VALID(voucher_name)) {
2982 new_voucher = convert_port_name_to_voucher(voucher_name);
2983 if (IPC_VOUCHER_NULL == new_voucher)
2984 return KERN_INVALID_ARGUMENT;
2985 }
2986 bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2987
2988 thread_mtx_lock(thread);
2989 voucher = thread->ith_voucher;
2990 thread->ith_voucher_name = voucher_name;
2991 thread->ith_voucher = new_voucher;
2992 thread_mtx_unlock(thread);
2993
2994 bank_swap_thread_bank_ledger(thread, bankledger);
2995
2996 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2997 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2998 (uintptr_t)thread_tid(thread),
2999 (uintptr_t)voucher_name,
3000 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3001 1, 0);
3002
3003 if (IPC_VOUCHER_NULL != voucher)
3004 ipc_voucher_release(voucher);
3005
3006 return KERN_SUCCESS;
3007 }
3008
3009 /*
3010 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3011 *
3012 * Conditions: nothing locked
3013 *
3014 * A reference to the voucher may be lazily pending, if someone set the voucher name
3015 * but nobody has done a lookup yet. In that case, we'll have to do the equivalent
3016 * lookup here.
3017 *
3018 * NOTE: At the moment, there is no distinction between the current and effective
3019 * vouchers because we only set them at the thread level currently.
3020 */
3021 kern_return_t
3022 thread_get_mach_voucher(
3023 thread_act_t thread,
3024 mach_voucher_selector_t __unused which,
3025 ipc_voucher_t *voucherp)
3026 {
3027 ipc_voucher_t voucher;
3028 mach_port_name_t voucher_name;
3029
3030 if (THREAD_NULL == thread)
3031 return KERN_INVALID_ARGUMENT;
3032
3033 thread_mtx_lock(thread);
3034 voucher = thread->ith_voucher;
3035
3036 /* if already cached, just return a ref */
3037 if (IPC_VOUCHER_NULL != voucher) {
3038 ipc_voucher_reference(voucher);
3039 thread_mtx_unlock(thread);
3040 *voucherp = voucher;
3041 return KERN_SUCCESS;
3042 }
3043
3044 voucher_name = thread->ith_voucher_name;
3045
3046 /* convert the name to a port, then voucher reference */
3047 if (MACH_PORT_VALID(voucher_name)) {
3048 ipc_port_t port;
3049
3050 if (KERN_SUCCESS !=
3051 ipc_object_copyin(thread->task->itk_space, voucher_name,
3052 MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
3053 thread->ith_voucher_name = MACH_PORT_NULL;
3054 thread_mtx_unlock(thread);
3055 *voucherp = IPC_VOUCHER_NULL;
3056 return KERN_SUCCESS;
3057 }
3058
3059 /* convert to a voucher ref to return, and cache a ref on thread */
3060 voucher = convert_port_to_voucher(port);
3061 ipc_voucher_reference(voucher);
3062 thread->ith_voucher = voucher;
3063 thread_mtx_unlock(thread);
3064
3065 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3066 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3067 (uintptr_t)thread_tid(thread),
3068 (uintptr_t)port,
3069 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3070 2, 0);
3071
3072
3073 ipc_port_release_send(port);
3074 } else
3075 thread_mtx_unlock(thread);
3076
3077 *voucherp = voucher;
3078 return KERN_SUCCESS;
3079 }
3080
3081 /*
3082 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3083 *
3084 * Conditions: callers holds a reference on the voucher.
3085 * nothing locked.
3086 *
3087 * We grab another reference to the voucher and bind it to the thread. Any lazy
3088 * binding is erased. The old voucher reference associated with the thread is
3089 * discarded.
3090 */
3091 kern_return_t
3092 thread_set_mach_voucher(
3093 thread_t thread,
3094 ipc_voucher_t voucher)
3095 {
3096 ipc_voucher_t old_voucher;
3097 ledger_t bankledger = NULL;
3098 struct thread_group *banktg = NULL;
3099
3100 if (THREAD_NULL == thread)
3101 return KERN_INVALID_ARGUMENT;
3102
3103 if (thread != current_thread() && thread->started)
3104 return KERN_INVALID_ARGUMENT;
3105
3106 ipc_voucher_reference(voucher);
3107 bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
3108
3109 thread_mtx_lock(thread);
3110 old_voucher = thread->ith_voucher;
3111 thread->ith_voucher = voucher;
3112 thread->ith_voucher_name = MACH_PORT_NULL;
3113 thread_mtx_unlock(thread);
3114
3115 bank_swap_thread_bank_ledger(thread, bankledger);
3116
3117 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3118 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3119 (uintptr_t)thread_tid(thread),
3120 (uintptr_t)MACH_PORT_NULL,
3121 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3122 3, 0);
3123
3124 ipc_voucher_release(old_voucher);
3125
3126 return KERN_SUCCESS;
3127 }
3128
3129 /*
3130 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3131 *
3132 * Conditions: callers holds a reference on the new and presumed old voucher(s).
3133 * nothing locked.
3134 *
3135 * This function is no longer supported.
3136 */
3137 kern_return_t
3138 thread_swap_mach_voucher(
3139 __unused thread_t thread,
3140 __unused ipc_voucher_t new_voucher,
3141 ipc_voucher_t *in_out_old_voucher)
3142 {
3143 /*
3144 * Currently this function is only called from a MIG generated
3145 * routine which doesn't release the reference on the voucher
3146 * addressed by in_out_old_voucher. To avoid leaking this reference,
3147 * a call to release it has been added here.
3148 */
3149 ipc_voucher_release(*in_out_old_voucher);
3150 return KERN_NOT_SUPPORTED;
3151 }
3152
3153 /*
3154 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3155 */
3156 kern_return_t
3157 thread_get_current_voucher_origin_pid(
3158 int32_t *pid)
3159 {
3160 uint32_t buf_size;
3161 kern_return_t kr;
3162 thread_t thread = current_thread();
3163
3164 buf_size = sizeof(*pid);
3165 kr = mach_voucher_attr_command(thread->ith_voucher,
3166 MACH_VOUCHER_ATTR_KEY_BANK,
3167 BANK_ORIGINATOR_PID,
3168 NULL,
3169 0,
3170 (mach_voucher_attr_content_t)pid,
3171 &buf_size);
3172
3173 return kr;
3174 }
3175
3176
3177 boolean_t
3178 thread_has_thread_name(thread_t th)
3179 {
3180 if ((th) && (th->uthread)) {
3181 return bsd_hasthreadname(th->uthread);
3182 }
3183
3184 /*
3185 * This is an odd case; clients may set the thread name based on the lack of
3186 * a name, but in this context there is no uthread to attach the name to.
3187 */
3188 return FALSE;
3189 }
3190
3191 void
3192 thread_set_thread_name(thread_t th, const char* name)
3193 {
3194 if ((th) && (th->uthread) && name) {
3195 bsd_setthreadname(th->uthread, name);
3196 }
3197 }
3198
3199 void
3200 thread_set_honor_qlimit(thread_t thread)
3201 {
3202 thread->options |= TH_OPT_HONOR_QLIMIT;
3203 }
3204
3205 void
3206 thread_clear_honor_qlimit(thread_t thread)
3207 {
3208 thread->options &= (~TH_OPT_HONOR_QLIMIT);
3209 }
3210
3211 /*
3212 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3213 */
3214 void thread_enable_send_importance(thread_t thread, boolean_t enable)
3215 {
3216 if (enable == TRUE)
3217 thread->options |= TH_OPT_SEND_IMPORTANCE;
3218 else
3219 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3220 }
3221
3222 /*
3223 * thread_set_allocation_name - .
3224 */
3225
3226 kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3227 {
3228 kern_allocation_name_t ret;
3229 thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3230 ret = kstate->allocation_name;
3231 // fifo
3232 if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3233 return ret;
3234 }
3235
3236 uint64_t
3237 thread_get_last_wait_duration(thread_t thread)
3238 {
3239 return thread->last_made_runnable_time - thread->last_run_time;
3240 }
3241
3242 #if CONFIG_DTRACE
3243 uint32_t dtrace_get_thread_predcache(thread_t thread)
3244 {
3245 if (thread != THREAD_NULL)
3246 return thread->t_dtrace_predcache;
3247 else
3248 return 0;
3249 }
3250
3251 int64_t dtrace_get_thread_vtime(thread_t thread)
3252 {
3253 if (thread != THREAD_NULL)
3254 return thread->t_dtrace_vtime;
3255 else
3256 return 0;
3257 }
3258
3259 int dtrace_get_thread_last_cpu_id(thread_t thread)
3260 {
3261 if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3262 return thread->last_processor->cpu_id;
3263 } else {
3264 return -1;
3265 }
3266 }
3267
3268 int64_t dtrace_get_thread_tracing(thread_t thread)
3269 {
3270 if (thread != THREAD_NULL)
3271 return thread->t_dtrace_tracing;
3272 else
3273 return 0;
3274 }
3275
3276 boolean_t dtrace_get_thread_reentering(thread_t thread)
3277 {
3278 if (thread != THREAD_NULL)
3279 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3280 else
3281 return 0;
3282 }
3283
3284 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3285 {
3286 if (thread != THREAD_NULL)
3287 return thread->kernel_stack;
3288 else
3289 return 0;
3290 }
3291
3292 #if KASAN
3293 struct kasan_thread_data *
3294 kasan_get_thread_data(thread_t thread)
3295 {
3296 return &thread->kasan_data;
3297 }
3298 #endif
3299
3300 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3301 {
3302 if (thread != THREAD_NULL) {
3303 processor_t processor = current_processor();
3304 uint64_t abstime = mach_absolute_time();
3305 timer_t timer;
3306
3307 timer = PROCESSOR_DATA(processor, thread_timer);
3308
3309 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3310 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3311 } else
3312 return 0;
3313 }
3314
3315 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3316 {
3317 if (thread != THREAD_NULL)
3318 thread->t_dtrace_predcache = predcache;
3319 }
3320
3321 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3322 {
3323 if (thread != THREAD_NULL)
3324 thread->t_dtrace_vtime = vtime;
3325 }
3326
3327 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3328 {
3329 if (thread != THREAD_NULL)
3330 thread->t_dtrace_tracing = accum;
3331 }
3332
3333 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3334 {
3335 if (thread != THREAD_NULL) {
3336 if (vbool)
3337 thread->options |= TH_OPT_DTRACE;
3338 else
3339 thread->options &= (~TH_OPT_DTRACE);
3340 }
3341 }
3342
3343 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3344 {
3345 vm_offset_t prev = 0;
3346
3347 if (thread != THREAD_NULL) {
3348 prev = thread->recover;
3349 thread->recover = recover;
3350 }
3351 return prev;
3352 }
3353
3354 void dtrace_thread_bootstrap(void)
3355 {
3356 task_t task = current_task();
3357
3358 if (task->thread_count == 1) {
3359 thread_t thread = current_thread();
3360 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3361 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3362 DTRACE_PROC(exec__success);
3363 KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3364 task_pid(task));
3365 }
3366 DTRACE_PROC(start);
3367 }
3368 DTRACE_PROC(lwp__start);
3369
3370 }
3371
3372 void
3373 dtrace_thread_didexec(thread_t thread)
3374 {
3375 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3376 }
3377 #endif /* CONFIG_DTRACE */