2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * @OSF_FREE_COPYRIGHT@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
63 * Thread management primitives implementation.
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
84 #include <mach/mach_types.h>
85 #include <mach/boolean.h>
86 #include <mach/policy.h>
87 #include <mach/thread_info.h>
88 #include <mach/thread_special_ports.h>
89 #include <mach/thread_status.h>
90 #include <mach/time_value.h>
91 #include <mach/vm_param.h>
93 #include <machine/thread.h>
94 #include <machine/pal_routines.h>
95 #include <machine/limits.h>
97 #include <kern/kern_types.h>
98 #include <kern/kalloc.h>
99 #include <kern/cpu_data.h>
100 #include <kern/counters.h>
101 #include <kern/extmod_statistics.h>
102 #include <kern/ipc_mig.h>
103 #include <kern/ipc_tt.h>
104 #include <kern/mach_param.h>
105 #include <kern/machine.h>
106 #include <kern/misc_protos.h>
107 #include <kern/processor.h>
108 #include <kern/queue.h>
109 #include <kern/sched.h>
110 #include <kern/sched_prim.h>
111 #include <kern/sync_lock.h>
112 #include <kern/syscall_subr.h>
113 #include <kern/task.h>
114 #include <kern/thread.h>
115 #include <kern/host.h>
116 #include <kern/zalloc.h>
117 #include <kern/assert.h>
118 #include <kern/exc_resource.h>
119 #include <kern/telemetry.h>
120 #include <kern/policy_internal.h>
122 #include <corpses/task_corpse.h>
124 #include <kern/kpc.h>
127 #include <ipc/ipc_kmsg.h>
128 #include <ipc/ipc_port.h>
129 #include <bank/bank_types.h>
131 #include <vm/vm_kern.h>
132 #include <vm/vm_pageout.h>
134 #include <sys/kdebug.h>
135 #include <sys/bsdtask_info.h>
136 #include <mach/sdt.h>
139 * Exported interfaces
141 #include <mach/task_server.h>
142 #include <mach/thread_act_server.h>
143 #include <mach/mach_host_server.h>
144 #include <mach/host_priv_server.h>
145 #include <mach/mach_voucher_server.h>
146 #include <kern/policy_internal.h>
148 static struct zone
*thread_zone
;
149 static lck_grp_attr_t thread_lck_grp_attr
;
150 lck_attr_t thread_lck_attr
;
151 lck_grp_t thread_lck_grp
;
153 struct zone
*thread_qos_override_zone
;
155 decl_simple_lock_data(static,thread_stack_lock
)
156 static queue_head_t thread_stack_queue
;
158 decl_simple_lock_data(static,thread_terminate_lock
)
159 static queue_head_t thread_terminate_queue
;
161 static queue_head_t crashed_threads_queue
;
163 decl_simple_lock_data(static,thread_exception_lock
)
164 static queue_head_t thread_exception_queue
;
166 struct thread_exception_elt
{
168 task_t exception_task
;
169 thread_t exception_thread
;
172 static struct thread thread_template
, init_thread
;
174 static void sched_call_null(
179 extern void proc_exit(void *);
180 extern mach_exception_data_type_t
proc_encode_exit_exception_code(void *);
181 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
182 extern int proc_selfpid(void);
183 extern char * proc_name_address(void *p
);
184 #endif /* MACH_BSD */
186 extern int disable_exc_resource
;
187 extern int audio_active
;
188 extern int debug_task
;
189 int thread_max
= CONFIG_THREAD_MAX
; /* Max number of threads */
190 int task_threadmax
= CONFIG_THREAD_MAX
;
192 static uint64_t thread_unique_id
= 100;
194 struct _thread_ledger_indices thread_ledgers
= { -1 };
195 static ledger_template_t thread_ledger_template
= NULL
;
196 static void init_thread_ledgers(void);
199 void jetsam_on_ledger_cpulimit_exceeded(void);
203 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
205 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
206 * stacktraces, aka micro-stackshots)
208 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
210 int cpumon_ustackshots_trigger_pct
; /* Percentage. Level at which we start gathering telemetry. */
211 void __attribute__((noinline
)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
214 * The smallest interval over which we support limiting CPU consumption is 1ms
216 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
219 thread_bootstrap(void)
222 * Fill in a template thread for fast initialization.
226 thread_template
.thread_magic
= THREAD_MAGIC
;
227 #endif /* MACH_ASSERT */
229 thread_template
.runq
= PROCESSOR_NULL
;
231 thread_template
.ref_count
= 2;
233 thread_template
.reason
= AST_NONE
;
234 thread_template
.at_safe_point
= FALSE
;
235 thread_template
.wait_event
= NO_EVENT64
;
236 thread_template
.waitq
= NULL
;
237 thread_template
.wait_result
= THREAD_WAITING
;
238 thread_template
.options
= THREAD_ABORTSAFE
;
239 thread_template
.state
= TH_WAIT
| TH_UNINT
;
240 thread_template
.wake_active
= FALSE
;
241 thread_template
.continuation
= THREAD_CONTINUE_NULL
;
242 thread_template
.parameter
= NULL
;
244 thread_template
.importance
= 0;
245 thread_template
.sched_mode
= TH_MODE_NONE
;
246 thread_template
.sched_flags
= 0;
247 thread_template
.saved_mode
= TH_MODE_NONE
;
248 thread_template
.safe_release
= 0;
249 thread_template
.th_sched_bucket
= TH_BUCKET_RUN
;
251 thread_template
.sfi_class
= SFI_CLASS_UNSPECIFIED
;
252 thread_template
.sfi_wait_class
= SFI_CLASS_UNSPECIFIED
;
254 thread_template
.active
= 0;
255 thread_template
.started
= 0;
256 thread_template
.static_param
= 0;
257 thread_template
.policy_reset
= 0;
259 thread_template
.base_pri
= BASEPRI_DEFAULT
;
260 thread_template
.sched_pri
= 0;
261 thread_template
.max_priority
= 0;
262 thread_template
.task_priority
= 0;
263 thread_template
.promotions
= 0;
264 thread_template
.pending_promoter_index
= 0;
265 thread_template
.pending_promoter
[0] = NULL
;
266 thread_template
.pending_promoter
[1] = NULL
;
267 thread_template
.rwlock_count
= 0;
270 thread_template
.realtime
.deadline
= UINT64_MAX
;
272 thread_template
.quantum_remaining
= 0;
273 thread_template
.last_run_time
= 0;
274 thread_template
.last_made_runnable_time
= 0;
276 thread_template
.computation_metered
= 0;
277 thread_template
.computation_epoch
= 0;
279 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
280 thread_template
.sched_stamp
= 0;
281 thread_template
.pri_shift
= INT8_MAX
;
282 thread_template
.sched_usage
= 0;
283 thread_template
.cpu_usage
= thread_template
.cpu_delta
= 0;
285 thread_template
.c_switch
= thread_template
.p_switch
= thread_template
.ps_switch
= 0;
287 thread_template
.bound_processor
= PROCESSOR_NULL
;
288 thread_template
.last_processor
= PROCESSOR_NULL
;
290 thread_template
.sched_call
= sched_call_null
;
292 timer_init(&thread_template
.user_timer
);
293 timer_init(&thread_template
.system_timer
);
294 thread_template
.user_timer_save
= 0;
295 thread_template
.system_timer_save
= 0;
296 thread_template
.vtimer_user_save
= 0;
297 thread_template
.vtimer_prof_save
= 0;
298 thread_template
.vtimer_rlim_save
= 0;
299 thread_template
.vtimer_qos_save
= 0;
302 thread_template
.wait_sfi_begin_time
= 0;
305 thread_template
.wait_timer_is_set
= FALSE
;
306 thread_template
.wait_timer_active
= 0;
308 thread_template
.depress_timer_active
= 0;
310 thread_template
.recover
= (vm_offset_t
)NULL
;
312 thread_template
.map
= VM_MAP_NULL
;
315 thread_template
.t_dtrace_predcache
= 0;
316 thread_template
.t_dtrace_vtime
= 0;
317 thread_template
.t_dtrace_tracing
= 0;
318 #endif /* CONFIG_DTRACE */
321 thread_template
.kperf_flags
= 0;
322 thread_template
.kperf_pet_gen
= 0;
323 thread_template
.kperf_c_switch
= 0;
324 thread_template
.kperf_pet_cnt
= 0;
328 thread_template
.kpc_buf
= NULL
;
332 thread_template
.hv_thread_target
= NULL
;
333 #endif /* HYPERVISOR */
335 #if (DEVELOPMENT || DEBUG)
336 thread_template
.t_page_creation_throttled_hard
= 0;
337 thread_template
.t_page_creation_throttled_soft
= 0;
338 #endif /* DEVELOPMENT || DEBUG */
339 thread_template
.t_page_creation_throttled
= 0;
340 thread_template
.t_page_creation_count
= 0;
341 thread_template
.t_page_creation_time
= 0;
343 thread_template
.affinity_set
= NULL
;
345 thread_template
.syscalls_unix
= 0;
346 thread_template
.syscalls_mach
= 0;
348 thread_template
.t_ledger
= LEDGER_NULL
;
349 thread_template
.t_threadledger
= LEDGER_NULL
;
351 thread_template
.t_bankledger
= LEDGER_NULL
;
352 thread_template
.t_deduct_bank_ledger_time
= 0;
355 thread_template
.requested_policy
= (struct thread_requested_policy
) {};
356 thread_template
.effective_policy
= (struct thread_effective_policy
) {};
358 bzero(&thread_template
.overrides
, sizeof(thread_template
.overrides
));
360 thread_template
.iotier_override
= THROTTLE_LEVEL_NONE
;
361 thread_template
.thread_io_stats
= NULL
;
362 thread_template
.thread_callout_interrupt_wakeups
= thread_template
.thread_callout_platform_idle_wakeups
= 0;
364 thread_template
.thread_timer_wakeups_bin_1
= thread_template
.thread_timer_wakeups_bin_2
= 0;
365 thread_template
.callout_woken_from_icontext
= thread_template
.callout_woken_from_platform_idle
= 0;
367 thread_template
.thread_tag
= 0;
369 thread_template
.ith_voucher_name
= MACH_PORT_NULL
;
370 thread_template
.ith_voucher
= IPC_VOUCHER_NULL
;
372 thread_template
.work_interval_id
= 0;
374 init_thread
= thread_template
;
375 machine_set_current_thread(&init_thread
);
378 extern boolean_t allow_qos_policy_set
;
384 sizeof(struct thread
),
385 thread_max
* sizeof(struct thread
),
386 THREAD_CHUNK
* sizeof(struct thread
),
389 thread_qos_override_zone
= zinit(
390 sizeof(struct thread_qos_override
),
391 4 * thread_max
* sizeof(struct thread_qos_override
),
393 "thread qos override");
394 zone_change(thread_qos_override_zone
, Z_EXPAND
, TRUE
);
395 zone_change(thread_qos_override_zone
, Z_COLLECT
, TRUE
);
396 zone_change(thread_qos_override_zone
, Z_CALLERACCT
, FALSE
);
397 zone_change(thread_qos_override_zone
, Z_NOENCRYPT
, TRUE
);
399 lck_grp_attr_setdefault(&thread_lck_grp_attr
);
400 lck_grp_init(&thread_lck_grp
, "thread", &thread_lck_grp_attr
);
401 lck_attr_setdefault(&thread_lck_attr
);
405 thread_policy_init();
408 * Initialize any machine-dependent
409 * per-thread structures necessary.
411 machine_thread_init();
413 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct
,
414 sizeof (cpumon_ustackshots_trigger_pct
))) {
415 cpumon_ustackshots_trigger_pct
= CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT
;
418 PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set
, sizeof(allow_qos_policy_set
));
420 init_thread_ledgers();
424 thread_is_active(thread_t thread
)
426 return (thread
->active
);
430 thread_corpse_continue(void)
432 thread_t thread
= current_thread();
434 thread_terminate_internal(thread
);
435 ml_set_interrupts_enabled(FALSE
);
436 ast_taken(AST_APC
, TRUE
);
438 panic("thread_corpse_continue");
443 thread_terminate_continue(void)
445 panic("thread_terminate_continue");
450 * thread_terminate_self:
453 thread_terminate_self(void)
455 thread_t thread
= current_thread();
460 pal_thread_terminate_self(thread
);
462 DTRACE_PROC(lwp__exit
);
464 thread_mtx_lock(thread
);
466 ipc_thread_disable(thread
);
468 thread_mtx_unlock(thread
);
474 * Cancel priority depression, wait for concurrent expirations
475 * on other processors.
477 if (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) {
478 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
480 /* If our priority was low because of a depressed yield, restore it in case we block below */
481 thread_recompute_sched_pri(thread
, FALSE
);
483 if (timer_call_cancel(&thread
->depress_timer
))
484 thread
->depress_timer_active
--;
487 while (thread
->depress_timer_active
> 0) {
488 thread_unlock(thread
);
497 thread_sched_call(thread
, NULL
);
499 thread_unlock(thread
);
503 thread_mtx_lock(thread
);
505 thread_policy_reset(thread
);
507 thread_mtx_unlock(thread
);
510 uthread_cleanup(task
, thread
->uthread
, task
->bsd_info
);
511 threadcnt
= hw_atomic_sub(&task
->active_thread_count
, 1);
513 if (task
->bsd_info
&& !task_is_exec_copy(task
)) {
514 /* trace out pid before we sign off */
517 kdbg_trace_data(thread
->task
->bsd_info
, &dbg_arg1
);
519 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE_PID
| DBG_FUNC_NONE
,
520 dbg_arg1
, 0, 0, 0, 0);
524 * If we are the last thread to terminate and the task is
525 * associated with a BSD process, perform BSD process exit.
527 if (threadcnt
== 0 && task
->bsd_info
!= NULL
&& !task_is_exec_copy(task
)) {
528 mach_exception_data_type_t subcode
= 0;
530 /* since we're the last thread in this process, trace out the command name too */
531 long dbg_arg1
= 0, dbg_arg2
= 0, dbg_arg3
= 0, dbg_arg4
= 0;
533 kdbg_trace_string(thread
->task
->bsd_info
, &dbg_arg1
, &dbg_arg2
, &dbg_arg3
, &dbg_arg4
);
535 KERNEL_DEBUG_CONSTANT(TRACE_STRING_PROC_EXIT
| DBG_FUNC_NONE
,
536 dbg_arg1
, dbg_arg2
, dbg_arg3
, dbg_arg4
, 0);
539 /* Get the exit reason before proc_exit */
540 subcode
= proc_encode_exit_exception_code(task
->bsd_info
);
541 proc_exit(task
->bsd_info
);
543 * if there is crash info in task
544 * then do the deliver action since this is
545 * last thread for this task.
547 if (task
->corpse_info
) {
548 task_deliver_crash_notification(task
, current_thread(), subcode
);
552 if (threadcnt
== 0) {
554 if (task_is_a_corpse_fork(task
)) {
555 thread_wakeup((event_t
)&task
->active_thread_count
);
560 uthread_cred_free(thread
->uthread
);
566 * Cancel wait timer, and wait for
567 * concurrent expirations.
569 if (thread
->wait_timer_is_set
) {
570 thread
->wait_timer_is_set
= FALSE
;
572 if (timer_call_cancel(&thread
->wait_timer
))
573 thread
->wait_timer_active
--;
576 while (thread
->wait_timer_active
> 0) {
577 thread_unlock(thread
);
587 * If there is a reserved stack, release it.
589 if (thread
->reserved_stack
!= 0) {
590 stack_free_reserved(thread
);
591 thread
->reserved_stack
= 0;
595 * Mark thread as terminating, and block.
597 thread
->state
|= TH_TERMINATE
;
598 thread_mark_wait_locked(thread
, THREAD_UNINT
);
599 assert((thread
->sched_flags
& TH_SFLAG_PROMOTED
) == 0);
600 assert(thread
->promotions
== 0);
601 assert(!(thread
->sched_flags
& TH_SFLAG_WAITQ_PROMOTED
));
602 assert(thread
->rwlock_count
== 0);
603 thread_unlock(thread
);
606 thread_block((thread_continue_t
)thread_terminate_continue
);
610 /* Drop a thread refcount that definitely isn't the last one. */
612 thread_deallocate_safe(thread_t thread
)
614 assert_thread_magic(thread
);
616 uint32_t old_refcount
= hw_atomic_sub(&(thread
)->ref_count
, 1) + 1;
618 if (__improbable(old_refcount
<= 1))
619 panic("bad thread refcount: %d", old_refcount
);
628 if (thread
== THREAD_NULL
)
631 assert_thread_magic(thread
);
632 assert(thread
->ref_count
> 0);
634 if (__probable(hw_atomic_sub(&(thread
)->ref_count
, 1) > 0))
637 if(!(thread
->state
& TH_TERMINATE2
))
638 panic("thread_deallocate: thread not properly terminated\n");
640 assert(thread
->runq
== PROCESSOR_NULL
);
642 assert(thread
->user_promotions
== 0);
645 kpc_thread_destroy(thread
);
648 ipc_thread_terminate(thread
);
650 proc_thread_qos_deallocate(thread
);
656 void *ut
= thread
->uthread
;
658 thread
->uthread
= NULL
;
659 uthread_zone_free(ut
);
661 #endif /* MACH_BSD */
663 if (thread
->t_ledger
)
664 ledger_dereference(thread
->t_ledger
);
665 if (thread
->t_threadledger
)
666 ledger_dereference(thread
->t_threadledger
);
668 if (IPC_VOUCHER_NULL
!= thread
->ith_voucher
)
669 ipc_voucher_release(thread
->ith_voucher
);
671 if (thread
->thread_io_stats
)
672 kfree(thread
->thread_io_stats
, sizeof(struct io_stat_info
));
674 if (thread
->kernel_stack
!= 0)
677 lck_mtx_destroy(&thread
->mutex
, &thread_lck_grp
);
678 machine_thread_destroy(thread
);
680 task_deallocate(task
);
683 assert_thread_magic(thread
);
684 thread
->thread_magic
= 0;
685 #endif /* MACH_ASSERT */
687 zfree(thread_zone
, thread
);
691 * thread_exception_daemon:
693 * Deliver EXC_RESOURCE exception
696 thread_exception_daemon(void)
698 struct thread_exception_elt
*elt
;
702 simple_lock(&thread_exception_lock
);
703 while ((elt
= (struct thread_exception_elt
*)dequeue_head(&thread_exception_queue
)) != NULL
) {
704 simple_unlock(&thread_exception_lock
);
706 task
= elt
->exception_task
;
707 thread
= elt
->exception_thread
;
708 assert_thread_magic(thread
);
710 kfree(elt
, sizeof(struct thread_exception_elt
));
712 /* wait for all the threads in the task to terminate */
714 task_wait_till_threads_terminate_locked(task
);
717 /* Consumes the task ref returned by task_generate_corpse_internal */
718 task_deallocate(task
);
719 /* Consumes the thread ref returned by task_generate_corpse_internal */
720 thread_deallocate(thread
);
722 /* Deliver the EXC_RESOURCE notification, also clears the corpse. */
723 task_deliver_crash_notification(task
, thread
, 0);
725 simple_lock(&thread_exception_lock
);
728 assert_wait((event_t
)&thread_exception_queue
, THREAD_UNINT
);
729 simple_unlock(&thread_exception_lock
);
731 thread_block((thread_continue_t
)thread_exception_daemon
);
735 * thread_exception_enqueue:
737 * Enqueue a corpse port to be delivered an EXC_RESOURCE.
740 thread_exception_enqueue(
744 struct thread_exception_elt
*elt
= (struct thread_exception_elt
*) kalloc(
745 sizeof(struct thread_exception_elt
));
747 elt
->exception_task
= task
;
748 elt
->exception_thread
= thread
;
750 simple_lock(&thread_exception_lock
);
751 enqueue_tail(&thread_exception_queue
, (queue_entry_t
)elt
);
752 simple_unlock(&thread_exception_lock
);
754 thread_wakeup((event_t
)&thread_exception_queue
);
758 * thread_copy_resource_info
760 * Copy the resource info counters from source
761 * thread to destination thread.
764 thread_copy_resource_info(
768 dst_thread
->thread_tag
= src_thread
->thread_tag
;
769 dst_thread
->c_switch
= src_thread
->c_switch
;
770 dst_thread
->p_switch
= src_thread
->p_switch
;
771 dst_thread
->ps_switch
= src_thread
->ps_switch
;
772 dst_thread
->precise_user_kernel_time
= src_thread
->precise_user_kernel_time
;
773 dst_thread
->user_timer
= src_thread
->user_timer
;
774 dst_thread
->user_timer_save
= src_thread
->user_timer_save
;
775 dst_thread
->system_timer_save
= src_thread
->system_timer_save
;
776 dst_thread
->syscalls_unix
= src_thread
->syscalls_unix
;
777 dst_thread
->syscalls_mach
= src_thread
->syscalls_mach
;
778 ledger_rollup(dst_thread
->t_threadledger
, src_thread
->t_threadledger
);
779 *dst_thread
->thread_io_stats
= *src_thread
->thread_io_stats
;
784 * thread_terminate_daemon:
786 * Perform final clean up for terminating threads.
789 thread_terminate_daemon(void)
791 thread_t self
, thread
;
794 self
= current_thread();
795 self
->options
|= TH_OPT_SYSTEM_CRITICAL
;
798 simple_lock(&thread_terminate_lock
);
800 while ((thread
= qe_dequeue_head(&thread_terminate_queue
, struct thread
, runq_links
)) != THREAD_NULL
) {
801 assert_thread_magic(thread
);
804 * if marked for crash reporting, skip reaping.
805 * The corpse delivery thread will clear bit and enqueue
806 * for reaping when done
808 if (thread
->inspection
){
809 enqueue_tail(&crashed_threads_queue
, &thread
->runq_links
);
813 simple_unlock(&thread_terminate_lock
);
819 task
->total_user_time
+= timer_grab(&thread
->user_timer
);
820 if (thread
->precise_user_kernel_time
) {
821 task
->total_system_time
+= timer_grab(&thread
->system_timer
);
823 task
->total_user_time
+= timer_grab(&thread
->system_timer
);
826 task
->c_switch
+= thread
->c_switch
;
827 task
->p_switch
+= thread
->p_switch
;
828 task
->ps_switch
+= thread
->ps_switch
;
830 task
->syscalls_unix
+= thread
->syscalls_unix
;
831 task
->syscalls_mach
+= thread
->syscalls_mach
;
833 task
->task_timer_wakeups_bin_1
+= thread
->thread_timer_wakeups_bin_1
;
834 task
->task_timer_wakeups_bin_2
+= thread
->thread_timer_wakeups_bin_2
;
835 task
->task_gpu_ns
+= ml_gpu_stat(thread
);
836 task
->task_energy
+= ml_energy_stat(thread
);
838 thread_update_qos_cpu_time(thread
);
840 queue_remove(&task
->threads
, thread
, thread_t
, task_threads
);
841 task
->thread_count
--;
844 * If the task is being halted, and there is only one thread
845 * left in the task after this one, then wakeup that thread.
847 if (task
->thread_count
== 1 && task
->halting
)
848 thread_wakeup((event_t
)&task
->halting
);
852 lck_mtx_lock(&tasks_threads_lock
);
853 queue_remove(&threads
, thread
, thread_t
, threads
);
855 lck_mtx_unlock(&tasks_threads_lock
);
857 thread_deallocate(thread
);
860 simple_lock(&thread_terminate_lock
);
863 assert_wait((event_t
)&thread_terminate_queue
, THREAD_UNINT
);
864 simple_unlock(&thread_terminate_lock
);
867 self
->options
&= ~TH_OPT_SYSTEM_CRITICAL
;
868 thread_block((thread_continue_t
)thread_terminate_daemon
);
873 * thread_terminate_enqueue:
875 * Enqueue a terminating thread for final disposition.
877 * Called at splsched.
880 thread_terminate_enqueue(
883 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE
| DBG_FUNC_NONE
, thread
->thread_id
, 0, 0, 0, 0);
885 simple_lock(&thread_terminate_lock
);
886 enqueue_tail(&thread_terminate_queue
, &thread
->runq_links
);
887 simple_unlock(&thread_terminate_lock
);
889 thread_wakeup((event_t
)&thread_terminate_queue
);
893 * thread_terminate_crashed_threads:
894 * walk the list of crashed threads and put back set of threads
895 * who are no longer being inspected.
898 thread_terminate_crashed_threads()
901 boolean_t should_wake_terminate_queue
= FALSE
;
903 simple_lock(&thread_terminate_lock
);
905 * loop through the crashed threads queue
906 * to put any threads that are not being inspected anymore
909 qe_foreach_element_safe(th_remove
, &crashed_threads_queue
, runq_links
) {
910 /* make sure current_thread is never in crashed queue */
911 assert(th_remove
!= current_thread());
913 if (th_remove
->inspection
== FALSE
) {
914 re_queue_tail(&thread_terminate_queue
, &th_remove
->runq_links
);
915 should_wake_terminate_queue
= TRUE
;
919 simple_unlock(&thread_terminate_lock
);
920 if (should_wake_terminate_queue
== TRUE
) {
921 thread_wakeup((event_t
)&thread_terminate_queue
);
926 * thread_stack_daemon:
928 * Perform stack allocation as required due to
932 thread_stack_daemon(void)
938 simple_lock(&thread_stack_lock
);
940 while ((thread
= qe_dequeue_head(&thread_stack_queue
, struct thread
, runq_links
)) != THREAD_NULL
) {
941 assert_thread_magic(thread
);
943 simple_unlock(&thread_stack_lock
);
946 /* allocate stack with interrupts enabled so that we can call into VM */
949 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_STACK_WAIT
) | DBG_FUNC_END
, thread_tid(thread
), 0, 0, 0, 0);
953 thread_setrun(thread
, SCHED_PREEMPT
| SCHED_TAILQ
);
954 thread_unlock(thread
);
956 simple_lock(&thread_stack_lock
);
959 assert_wait((event_t
)&thread_stack_queue
, THREAD_UNINT
);
960 simple_unlock(&thread_stack_lock
);
963 thread_block((thread_continue_t
)thread_stack_daemon
);
968 * thread_stack_enqueue:
970 * Enqueue a thread for stack allocation.
972 * Called at splsched.
975 thread_stack_enqueue(
978 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_STACK_WAIT
) | DBG_FUNC_START
, thread_tid(thread
), 0, 0, 0, 0);
979 assert_thread_magic(thread
);
981 simple_lock(&thread_stack_lock
);
982 enqueue_tail(&thread_stack_queue
, &thread
->runq_links
);
983 simple_unlock(&thread_stack_lock
);
985 thread_wakeup((event_t
)&thread_stack_queue
);
989 thread_daemon_init(void)
991 kern_return_t result
;
992 thread_t thread
= NULL
;
994 simple_lock_init(&thread_terminate_lock
, 0);
995 queue_init(&thread_terminate_queue
);
996 queue_init(&crashed_threads_queue
);
998 result
= kernel_thread_start_priority((thread_continue_t
)thread_terminate_daemon
, NULL
, MINPRI_KERNEL
, &thread
);
999 if (result
!= KERN_SUCCESS
)
1000 panic("thread_daemon_init: thread_terminate_daemon");
1002 thread_deallocate(thread
);
1004 simple_lock_init(&thread_stack_lock
, 0);
1005 queue_init(&thread_stack_queue
);
1007 result
= kernel_thread_start_priority((thread_continue_t
)thread_stack_daemon
, NULL
, BASEPRI_PREEMPT
, &thread
);
1008 if (result
!= KERN_SUCCESS
)
1009 panic("thread_daemon_init: thread_stack_daemon");
1011 thread_deallocate(thread
);
1013 simple_lock_init(&thread_exception_lock
, 0);
1014 queue_init(&thread_exception_queue
);
1016 result
= kernel_thread_start_priority((thread_continue_t
)thread_exception_daemon
, NULL
, MINPRI_KERNEL
, &thread
);
1017 if (result
!= KERN_SUCCESS
)
1018 panic("thread_daemon_init: thread_exception_daemon");
1020 thread_deallocate(thread
);
1023 #define TH_OPTION_NONE 0x00
1024 #define TH_OPTION_NOCRED 0x01
1025 #define TH_OPTION_NOSUSP 0x02
1028 * Create a new thread.
1029 * Doesn't start the thread running.
1031 * Task and tasks_threads_lock are returned locked on success.
1033 static kern_return_t
1034 thread_create_internal(
1037 thread_continue_t continuation
,
1039 thread_t
*out_thread
)
1041 thread_t new_thread
;
1042 static thread_t first_thread
;
1045 * Allocate a thread and initialize static fields
1047 if (first_thread
== THREAD_NULL
)
1048 new_thread
= first_thread
= current_thread();
1050 new_thread
= (thread_t
)zalloc(thread_zone
);
1051 if (new_thread
== THREAD_NULL
)
1052 return (KERN_RESOURCE_SHORTAGE
);
1054 if (new_thread
!= first_thread
)
1055 *new_thread
= thread_template
;
1058 new_thread
->uthread
= uthread_alloc(parent_task
, new_thread
, (options
& TH_OPTION_NOCRED
) != 0);
1059 if (new_thread
->uthread
== NULL
) {
1061 new_thread
->thread_magic
= 0;
1062 #endif /* MACH_ASSERT */
1064 zfree(thread_zone
, new_thread
);
1065 return (KERN_RESOURCE_SHORTAGE
);
1067 #endif /* MACH_BSD */
1069 if (machine_thread_create(new_thread
, parent_task
) != KERN_SUCCESS
) {
1071 void *ut
= new_thread
->uthread
;
1073 new_thread
->uthread
= NULL
;
1074 /* cred free may not be necessary */
1075 uthread_cleanup(parent_task
, ut
, parent_task
->bsd_info
);
1076 uthread_cred_free(ut
);
1077 uthread_zone_free(ut
);
1078 #endif /* MACH_BSD */
1081 new_thread
->thread_magic
= 0;
1082 #endif /* MACH_ASSERT */
1084 zfree(thread_zone
, new_thread
);
1085 return (KERN_FAILURE
);
1088 new_thread
->task
= parent_task
;
1090 thread_lock_init(new_thread
);
1091 wake_lock_init(new_thread
);
1093 lck_mtx_init(&new_thread
->mutex
, &thread_lck_grp
, &thread_lck_attr
);
1095 ipc_thread_init(new_thread
);
1097 new_thread
->continuation
= continuation
;
1099 /* Allocate I/O Statistics structure */
1100 new_thread
->thread_io_stats
= (io_stat_info_t
)kalloc(sizeof(struct io_stat_info
));
1101 assert(new_thread
->thread_io_stats
!= NULL
);
1102 bzero(new_thread
->thread_io_stats
, sizeof(struct io_stat_info
));
1105 /* Clear out the I/O Scheduling info for AppleFSCompression */
1106 new_thread
->decmp_upl
= NULL
;
1107 #endif /* CONFIG_IOSCHED */
1109 lck_mtx_lock(&tasks_threads_lock
);
1110 task_lock(parent_task
);
1113 * Fail thread creation if parent task is being torn down or has too many threads
1114 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1116 if (parent_task
->active
== 0 || parent_task
->halting
||
1117 (parent_task
->suspend_count
> 0 && (options
& TH_OPTION_NOSUSP
) != 0) ||
1118 (parent_task
->thread_count
>= task_threadmax
&& parent_task
!= kernel_task
)) {
1119 task_unlock(parent_task
);
1120 lck_mtx_unlock(&tasks_threads_lock
);
1124 void *ut
= new_thread
->uthread
;
1126 new_thread
->uthread
= NULL
;
1127 uthread_cleanup(parent_task
, ut
, parent_task
->bsd_info
);
1128 /* cred free may not be necessary */
1129 uthread_cred_free(ut
);
1130 uthread_zone_free(ut
);
1132 #endif /* MACH_BSD */
1133 ipc_thread_disable(new_thread
);
1134 ipc_thread_terminate(new_thread
);
1135 kfree(new_thread
->thread_io_stats
, sizeof(struct io_stat_info
));
1136 lck_mtx_destroy(&new_thread
->mutex
, &thread_lck_grp
);
1137 machine_thread_destroy(new_thread
);
1138 zfree(thread_zone
, new_thread
);
1139 return (KERN_FAILURE
);
1142 /* New threads inherit any default state on the task */
1143 machine_thread_inherit_taskwide(new_thread
, parent_task
);
1145 task_reference_internal(parent_task
);
1147 if (new_thread
->task
->rusage_cpu_flags
& TASK_RUSECPU_FLAGS_PERTHR_LIMIT
) {
1149 * This task has a per-thread CPU limit; make sure this new thread
1150 * gets its limit set too, before it gets out of the kernel.
1152 set_astledger(new_thread
);
1155 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1156 if ((new_thread
->t_threadledger
= ledger_instantiate(thread_ledger_template
,
1157 LEDGER_CREATE_INACTIVE_ENTRIES
)) != LEDGER_NULL
) {
1159 ledger_entry_setactive(new_thread
->t_threadledger
, thread_ledgers
.cpu_time
);
1163 new_thread
->t_bankledger
= LEDGER_NULL
;
1164 new_thread
->t_deduct_bank_ledger_time
= 0;
1167 new_thread
->t_ledger
= new_thread
->task
->ledger
;
1168 if (new_thread
->t_ledger
)
1169 ledger_reference(new_thread
->t_ledger
);
1171 #if defined(CONFIG_SCHED_MULTIQ)
1172 /* Cache the task's sched_group */
1173 new_thread
->sched_group
= parent_task
->sched_group
;
1174 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1176 /* Cache the task's map */
1177 new_thread
->map
= parent_task
->map
;
1179 timer_call_setup(&new_thread
->wait_timer
, thread_timer_expire
, new_thread
);
1180 timer_call_setup(&new_thread
->depress_timer
, thread_depress_expire
, new_thread
);
1183 kpc_thread_create(new_thread
);
1186 /* Set the thread's scheduling parameters */
1187 new_thread
->sched_mode
= SCHED(initial_thread_sched_mode
)(parent_task
);
1188 new_thread
->max_priority
= parent_task
->max_priority
;
1189 new_thread
->task_priority
= parent_task
->priority
;
1191 int new_priority
= (priority
< 0) ? parent_task
->priority
: priority
;
1192 new_priority
= (priority
< 0)? parent_task
->priority
: priority
;
1193 if (new_priority
> new_thread
->max_priority
)
1194 new_priority
= new_thread
->max_priority
;
1196 new_thread
->importance
= new_priority
- new_thread
->task_priority
;
1198 sched_set_thread_base_priority(new_thread
, new_priority
);
1200 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1201 new_thread
->sched_stamp
= sched_tick
;
1202 new_thread
->pri_shift
= sched_pri_shifts
[new_thread
->th_sched_bucket
];
1203 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1206 thread_policy_create(new_thread
);
1208 /* Chain the thread onto the task's list */
1209 queue_enter(&parent_task
->threads
, new_thread
, thread_t
, task_threads
);
1210 parent_task
->thread_count
++;
1212 /* So terminating threads don't need to take the task lock to decrement */
1213 hw_atomic_add(&parent_task
->active_thread_count
, 1);
1215 /* Protected by the tasks_threads_lock */
1216 new_thread
->thread_id
= ++thread_unique_id
;
1218 queue_enter(&threads
, new_thread
, thread_t
, threads
);
1221 new_thread
->active
= TRUE
;
1222 if (task_is_a_corpse_fork(parent_task
)) {
1223 /* Set the inspection bit if the task is a corpse fork */
1224 new_thread
->inspection
= TRUE
;
1226 new_thread
->inspection
= FALSE
;
1228 new_thread
->corpse_dup
= FALSE
;
1229 *out_thread
= new_thread
;
1232 long dbg_arg1
, dbg_arg2
, dbg_arg3
, dbg_arg4
;
1234 kdbg_trace_data(parent_task
->bsd_info
, &dbg_arg2
);
1237 * Starting with 26604425, exec'ing creates a new task/thread.
1239 * NEWTHREAD in the current process has two possible meanings:
1241 * 1) Create a new thread for this process.
1242 * 2) Create a new thread for the future process this will become in an exec.
1244 * To disambiguate these, arg3 will be set to TRUE for case #2.
1246 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1247 * task exec'ing. The read of t_procflags does not take the proc_lock.
1249 dbg_arg3
= (task_is_exec_copy(parent_task
)) ? TRUE
: 0;
1251 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1252 TRACE_DATA_NEWTHREAD
| DBG_FUNC_NONE
,
1253 (vm_address_t
)(uintptr_t)thread_tid(new_thread
), dbg_arg2
, dbg_arg3
, 0, 0);
1255 kdbg_trace_string(parent_task
->bsd_info
,
1256 &dbg_arg1
, &dbg_arg2
, &dbg_arg3
, &dbg_arg4
);
1258 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1259 TRACE_STRING_NEWTHREAD
| DBG_FUNC_NONE
,
1260 dbg_arg1
, dbg_arg2
, dbg_arg3
, dbg_arg4
, 0);
1263 DTRACE_PROC1(lwp__create
, thread_t
, *out_thread
);
1265 return (KERN_SUCCESS
);
1268 static kern_return_t
1269 thread_create_internal2(
1271 thread_t
*new_thread
,
1272 boolean_t from_user
,
1273 thread_continue_t continuation
)
1275 kern_return_t result
;
1278 if (task
== TASK_NULL
|| task
== kernel_task
)
1279 return (KERN_INVALID_ARGUMENT
);
1281 result
= thread_create_internal(task
, -1, continuation
, TH_OPTION_NONE
, &thread
);
1282 if (result
!= KERN_SUCCESS
)
1285 thread
->user_stop_count
= 1;
1286 thread_hold(thread
);
1287 if (task
->suspend_count
> 0)
1288 thread_hold(thread
);
1291 extmod_statistics_incr_thread_create(task
);
1294 lck_mtx_unlock(&tasks_threads_lock
);
1296 *new_thread
= thread
;
1298 return (KERN_SUCCESS
);
1301 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1305 thread_t
*new_thread
);
1310 thread_t
*new_thread
)
1312 return thread_create_internal2(task
, new_thread
, FALSE
, (thread_continue_t
)thread_bootstrap_return
);
1316 thread_create_from_user(
1318 thread_t
*new_thread
)
1320 return thread_create_internal2(task
, new_thread
, TRUE
, (thread_continue_t
)thread_bootstrap_return
);
1324 thread_create_with_continuation(
1326 thread_t
*new_thread
,
1327 thread_continue_t continuation
)
1329 return thread_create_internal2(task
, new_thread
, FALSE
, continuation
);
1333 * Create a thread that is already started, but is waiting on an event
1335 static kern_return_t
1336 thread_create_waiting_internal(
1338 thread_continue_t continuation
,
1341 thread_t
*new_thread
)
1343 kern_return_t result
;
1346 if (task
== TASK_NULL
|| task
== kernel_task
)
1347 return (KERN_INVALID_ARGUMENT
);
1349 result
= thread_create_internal(task
, -1, continuation
, options
, &thread
);
1350 if (result
!= KERN_SUCCESS
)
1353 /* note no user_stop_count or thread_hold here */
1355 if (task
->suspend_count
> 0)
1356 thread_hold(thread
);
1358 thread_mtx_lock(thread
);
1359 thread_start_in_assert_wait(thread
, event
, THREAD_INTERRUPTIBLE
);
1360 thread_mtx_unlock(thread
);
1363 lck_mtx_unlock(&tasks_threads_lock
);
1365 *new_thread
= thread
;
1367 return (KERN_SUCCESS
);
1371 thread_create_waiting(
1373 thread_continue_t continuation
,
1375 thread_t
*new_thread
)
1377 return thread_create_waiting_internal(task
, continuation
, event
,
1378 TH_OPTION_NONE
, new_thread
);
1382 static kern_return_t
1383 thread_create_running_internal2(
1386 thread_state_t new_state
,
1387 mach_msg_type_number_t new_state_count
,
1388 thread_t
*new_thread
,
1389 boolean_t from_user
)
1391 kern_return_t result
;
1394 if (task
== TASK_NULL
|| task
== kernel_task
)
1395 return (KERN_INVALID_ARGUMENT
);
1397 result
= thread_create_internal(task
, -1, (thread_continue_t
)thread_bootstrap_return
, TH_OPTION_NONE
, &thread
);
1398 if (result
!= KERN_SUCCESS
)
1401 if (task
->suspend_count
> 0)
1402 thread_hold(thread
);
1404 result
= machine_thread_set_state(thread
, flavor
, new_state
, new_state_count
);
1405 if (result
!= KERN_SUCCESS
) {
1407 lck_mtx_unlock(&tasks_threads_lock
);
1409 thread_terminate(thread
);
1410 thread_deallocate(thread
);
1414 thread_mtx_lock(thread
);
1415 thread_start(thread
);
1416 thread_mtx_unlock(thread
);
1419 extmod_statistics_incr_thread_create(task
);
1422 lck_mtx_unlock(&tasks_threads_lock
);
1424 *new_thread
= thread
;
1429 /* Prototype, see justification above */
1431 thread_create_running(
1434 thread_state_t new_state
,
1435 mach_msg_type_number_t new_state_count
,
1436 thread_t
*new_thread
);
1439 thread_create_running(
1442 thread_state_t new_state
,
1443 mach_msg_type_number_t new_state_count
,
1444 thread_t
*new_thread
)
1446 return thread_create_running_internal2(
1447 task
, flavor
, new_state
, new_state_count
,
1452 thread_create_running_from_user(
1455 thread_state_t new_state
,
1456 mach_msg_type_number_t new_state_count
,
1457 thread_t
*new_thread
)
1459 return thread_create_running_internal2(
1460 task
, flavor
, new_state
, new_state_count
,
1465 thread_create_workq(
1467 thread_continue_t thread_return
,
1468 thread_t
*new_thread
)
1470 kern_return_t result
;
1473 if (task
== TASK_NULL
|| task
== kernel_task
)
1474 return (KERN_INVALID_ARGUMENT
);
1476 result
= thread_create_internal(task
, -1, thread_return
, TH_OPTION_NOCRED
| TH_OPTION_NOSUSP
, &thread
);
1477 if (result
!= KERN_SUCCESS
)
1480 thread
->user_stop_count
= 1;
1481 thread_hold(thread
);
1482 if (task
->suspend_count
> 0)
1483 thread_hold(thread
);
1486 lck_mtx_unlock(&tasks_threads_lock
);
1488 *new_thread
= thread
;
1490 return (KERN_SUCCESS
);
1494 thread_create_workq_waiting(
1496 thread_continue_t continuation
,
1498 thread_t
*new_thread
)
1501 return thread_create_waiting_internal(task
, continuation
, event
,
1502 TH_OPTION_NOCRED
| TH_OPTION_NOSUSP
,
1507 * kernel_thread_create:
1509 * Create a thread in the kernel task
1510 * to execute in kernel context.
1513 kernel_thread_create(
1514 thread_continue_t continuation
,
1517 thread_t
*new_thread
)
1519 kern_return_t result
;
1521 task_t task
= kernel_task
;
1523 result
= thread_create_internal(task
, priority
, continuation
, TH_OPTION_NONE
, &thread
);
1524 if (result
!= KERN_SUCCESS
)
1528 lck_mtx_unlock(&tasks_threads_lock
);
1530 stack_alloc(thread
);
1531 assert(thread
->kernel_stack
!= 0);
1532 thread
->reserved_stack
= thread
->kernel_stack
;
1534 thread
->parameter
= parameter
;
1537 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread
, continuation
);
1538 *new_thread
= thread
;
1544 kernel_thread_start_priority(
1545 thread_continue_t continuation
,
1548 thread_t
*new_thread
)
1550 kern_return_t result
;
1553 result
= kernel_thread_create(continuation
, parameter
, priority
, &thread
);
1554 if (result
!= KERN_SUCCESS
)
1557 *new_thread
= thread
;
1559 thread_mtx_lock(thread
);
1560 thread_start(thread
);
1561 thread_mtx_unlock(thread
);
1567 kernel_thread_start(
1568 thread_continue_t continuation
,
1570 thread_t
*new_thread
)
1572 return kernel_thread_start_priority(continuation
, parameter
, -1, new_thread
);
1575 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1576 /* it is assumed that the thread is locked by the caller */
1578 retrieve_thread_basic_info(thread_t thread
, thread_basic_info_t basic_info
)
1584 thread_read_times(thread
, &basic_info
->user_time
,
1585 &basic_info
->system_time
);
1588 * Update lazy-evaluated scheduler info because someone wants it.
1590 if (SCHED(can_update_priority
)(thread
))
1591 SCHED(update_priority
)(thread
);
1593 basic_info
->sleep_time
= 0;
1596 * To calculate cpu_usage, first correct for timer rate,
1597 * then for 5/8 ageing. The correction factor [3/5] is
1600 basic_info
->cpu_usage
= 0;
1601 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1602 if (sched_tick_interval
) {
1603 basic_info
->cpu_usage
= (integer_t
)(((uint64_t)thread
->cpu_usage
1604 * TH_USAGE_SCALE
) / sched_tick_interval
);
1605 basic_info
->cpu_usage
= (basic_info
->cpu_usage
* 3) / 5;
1609 if (basic_info
->cpu_usage
> TH_USAGE_SCALE
)
1610 basic_info
->cpu_usage
= TH_USAGE_SCALE
;
1612 basic_info
->policy
= ((thread
->sched_mode
== TH_MODE_TIMESHARE
)?
1613 POLICY_TIMESHARE
: POLICY_RR
);
1616 if (thread
->options
& TH_OPT_IDLE_THREAD
)
1617 flags
|= TH_FLAGS_IDLE
;
1619 if (thread
->options
& TH_OPT_GLOBAL_FORCED_IDLE
) {
1620 flags
|= TH_FLAGS_GLOBAL_FORCED_IDLE
;
1623 if (!thread
->kernel_stack
)
1624 flags
|= TH_FLAGS_SWAPPED
;
1627 if (thread
->state
& TH_TERMINATE
)
1628 state
= TH_STATE_HALTED
;
1630 if (thread
->state
& TH_RUN
)
1631 state
= TH_STATE_RUNNING
;
1633 if (thread
->state
& TH_UNINT
)
1634 state
= TH_STATE_UNINTERRUPTIBLE
;
1636 if (thread
->state
& TH_SUSP
)
1637 state
= TH_STATE_STOPPED
;
1639 if (thread
->state
& TH_WAIT
)
1640 state
= TH_STATE_WAITING
;
1642 basic_info
->run_state
= state
;
1643 basic_info
->flags
= flags
;
1645 basic_info
->suspend_count
= thread
->user_stop_count
;
1651 thread_info_internal(
1653 thread_flavor_t flavor
,
1654 thread_info_t thread_info_out
, /* ptr to OUT array */
1655 mach_msg_type_number_t
*thread_info_count
) /*IN/OUT*/
1659 if (thread
== THREAD_NULL
)
1660 return (KERN_INVALID_ARGUMENT
);
1662 if (flavor
== THREAD_BASIC_INFO
) {
1664 if (*thread_info_count
< THREAD_BASIC_INFO_COUNT
)
1665 return (KERN_INVALID_ARGUMENT
);
1668 thread_lock(thread
);
1670 retrieve_thread_basic_info(thread
, (thread_basic_info_t
) thread_info_out
);
1672 thread_unlock(thread
);
1675 *thread_info_count
= THREAD_BASIC_INFO_COUNT
;
1677 return (KERN_SUCCESS
);
1680 if (flavor
== THREAD_IDENTIFIER_INFO
) {
1681 thread_identifier_info_t identifier_info
;
1683 if (*thread_info_count
< THREAD_IDENTIFIER_INFO_COUNT
)
1684 return (KERN_INVALID_ARGUMENT
);
1686 identifier_info
= (thread_identifier_info_t
) thread_info_out
;
1689 thread_lock(thread
);
1691 identifier_info
->thread_id
= thread
->thread_id
;
1692 identifier_info
->thread_handle
= thread
->machine
.cthread_self
;
1693 identifier_info
->dispatch_qaddr
= thread_dispatchqaddr(thread
);
1695 thread_unlock(thread
);
1697 return KERN_SUCCESS
;
1700 if (flavor
== THREAD_SCHED_TIMESHARE_INFO
) {
1701 policy_timeshare_info_t ts_info
;
1703 if (*thread_info_count
< POLICY_TIMESHARE_INFO_COUNT
)
1704 return (KERN_INVALID_ARGUMENT
);
1706 ts_info
= (policy_timeshare_info_t
)thread_info_out
;
1709 thread_lock(thread
);
1711 if (thread
->sched_mode
!= TH_MODE_TIMESHARE
) {
1712 thread_unlock(thread
);
1714 return (KERN_INVALID_POLICY
);
1717 ts_info
->depressed
= (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != 0;
1718 if (ts_info
->depressed
) {
1719 ts_info
->base_priority
= DEPRESSPRI
;
1720 ts_info
->depress_priority
= thread
->base_pri
;
1723 ts_info
->base_priority
= thread
->base_pri
;
1724 ts_info
->depress_priority
= -1;
1727 ts_info
->cur_priority
= thread
->sched_pri
;
1728 ts_info
->max_priority
= thread
->max_priority
;
1730 thread_unlock(thread
);
1733 *thread_info_count
= POLICY_TIMESHARE_INFO_COUNT
;
1735 return (KERN_SUCCESS
);
1738 if (flavor
== THREAD_SCHED_FIFO_INFO
) {
1739 if (*thread_info_count
< POLICY_FIFO_INFO_COUNT
)
1740 return (KERN_INVALID_ARGUMENT
);
1742 return (KERN_INVALID_POLICY
);
1745 if (flavor
== THREAD_SCHED_RR_INFO
) {
1746 policy_rr_info_t rr_info
;
1747 uint32_t quantum_time
;
1748 uint64_t quantum_ns
;
1750 if (*thread_info_count
< POLICY_RR_INFO_COUNT
)
1751 return (KERN_INVALID_ARGUMENT
);
1753 rr_info
= (policy_rr_info_t
) thread_info_out
;
1756 thread_lock(thread
);
1758 if (thread
->sched_mode
== TH_MODE_TIMESHARE
) {
1759 thread_unlock(thread
);
1762 return (KERN_INVALID_POLICY
);
1765 rr_info
->depressed
= (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != 0;
1766 if (rr_info
->depressed
) {
1767 rr_info
->base_priority
= DEPRESSPRI
;
1768 rr_info
->depress_priority
= thread
->base_pri
;
1771 rr_info
->base_priority
= thread
->base_pri
;
1772 rr_info
->depress_priority
= -1;
1775 quantum_time
= SCHED(initial_quantum_size
)(THREAD_NULL
);
1776 absolutetime_to_nanoseconds(quantum_time
, &quantum_ns
);
1778 rr_info
->max_priority
= thread
->max_priority
;
1779 rr_info
->quantum
= (uint32_t)(quantum_ns
/ 1000 / 1000);
1781 thread_unlock(thread
);
1784 *thread_info_count
= POLICY_RR_INFO_COUNT
;
1786 return (KERN_SUCCESS
);
1789 if (flavor
== THREAD_EXTENDED_INFO
) {
1790 thread_basic_info_data_t basic_info
;
1791 thread_extended_info_t extended_info
= (thread_extended_info_t
) thread_info_out
;
1793 if (*thread_info_count
< THREAD_EXTENDED_INFO_COUNT
) {
1794 return (KERN_INVALID_ARGUMENT
);
1798 thread_lock(thread
);
1800 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1801 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1803 retrieve_thread_basic_info(thread
, &basic_info
);
1804 extended_info
->pth_user_time
= ((basic_info
.user_time
.seconds
* (integer_t
)NSEC_PER_SEC
) + (basic_info
.user_time
.microseconds
* (integer_t
)NSEC_PER_USEC
));
1805 extended_info
->pth_system_time
= ((basic_info
.system_time
.seconds
* (integer_t
)NSEC_PER_SEC
) + (basic_info
.system_time
.microseconds
* (integer_t
)NSEC_PER_USEC
));
1807 extended_info
->pth_cpu_usage
= basic_info
.cpu_usage
;
1808 extended_info
->pth_policy
= basic_info
.policy
;
1809 extended_info
->pth_run_state
= basic_info
.run_state
;
1810 extended_info
->pth_flags
= basic_info
.flags
;
1811 extended_info
->pth_sleep_time
= basic_info
.sleep_time
;
1812 extended_info
->pth_curpri
= thread
->sched_pri
;
1813 extended_info
->pth_priority
= thread
->base_pri
;
1814 extended_info
->pth_maxpriority
= thread
->max_priority
;
1816 bsd_getthreadname(thread
->uthread
,extended_info
->pth_name
);
1818 thread_unlock(thread
);
1821 *thread_info_count
= THREAD_EXTENDED_INFO_COUNT
;
1823 return (KERN_SUCCESS
);
1826 if (flavor
== THREAD_DEBUG_INFO_INTERNAL
) {
1827 #if DEVELOPMENT || DEBUG
1828 thread_debug_info_internal_t dbg_info
;
1829 if (*thread_info_count
< THREAD_DEBUG_INFO_INTERNAL_COUNT
)
1830 return (KERN_NOT_SUPPORTED
);
1832 if (thread_info_out
== NULL
)
1833 return (KERN_INVALID_ARGUMENT
);
1835 dbg_info
= (thread_debug_info_internal_t
) thread_info_out
;
1836 dbg_info
->page_creation_count
= thread
->t_page_creation_count
;
1838 *thread_info_count
= THREAD_DEBUG_INFO_INTERNAL_COUNT
;
1839 return (KERN_SUCCESS
);
1840 #endif /* DEVELOPMENT || DEBUG */
1841 return (KERN_NOT_SUPPORTED
);
1844 return (KERN_INVALID_ARGUMENT
);
1850 time_value_t
*user_time
,
1851 time_value_t
*system_time
)
1855 uint64_t tval_user
, tval_system
;
1857 tval_user
= timer_grab(&thread
->user_timer
);
1858 tval_system
= timer_grab(&thread
->system_timer
);
1860 if (thread
->precise_user_kernel_time
) {
1861 absolutetime_to_microtime(tval_user
, &secs
, &usecs
);
1862 user_time
->seconds
= (typeof(user_time
->seconds
))secs
;
1863 user_time
->microseconds
= usecs
;
1865 absolutetime_to_microtime(tval_system
, &secs
, &usecs
);
1866 system_time
->seconds
= (typeof(system_time
->seconds
))secs
;
1867 system_time
->microseconds
= usecs
;
1869 /* system_timer may represent either sys or user */
1870 tval_user
+= tval_system
;
1871 absolutetime_to_microtime(tval_user
, &secs
, &usecs
);
1872 user_time
->seconds
= (typeof(user_time
->seconds
))secs
;
1873 user_time
->microseconds
= usecs
;
1875 system_time
->seconds
= 0;
1876 system_time
->microseconds
= 0;
1880 uint64_t thread_get_runtime_self(void)
1882 boolean_t interrupt_state
;
1884 thread_t thread
= NULL
;
1885 processor_t processor
= NULL
;
1887 thread
= current_thread();
1889 /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
1890 interrupt_state
= ml_set_interrupts_enabled(FALSE
);
1891 processor
= current_processor();
1892 timer_switch(PROCESSOR_DATA(processor
, thread_timer
), mach_absolute_time(), PROCESSOR_DATA(processor
, thread_timer
));
1893 runtime
= (timer_grab(&thread
->user_timer
) + timer_grab(&thread
->system_timer
));
1894 ml_set_interrupts_enabled(interrupt_state
);
1901 __unused thread_t thread
,
1902 __unused processor_set_t new_pset
)
1904 return (KERN_FAILURE
);
1908 * thread_assign_default:
1910 * Special version of thread_assign for assigning threads to default
1914 thread_assign_default(
1917 return (thread_assign(thread
, &pset0
));
1921 * thread_get_assignment
1923 * Return current assignment for this thread.
1926 thread_get_assignment(
1928 processor_set_t
*pset
)
1931 return (KERN_INVALID_ARGUMENT
);
1935 return (KERN_SUCCESS
);
1939 * thread_wire_internal:
1941 * Specify that the target thread must always be able
1942 * to run and to allocate memory.
1945 thread_wire_internal(
1946 host_priv_t host_priv
,
1949 boolean_t
*prev_state
)
1951 if (host_priv
== NULL
|| thread
!= current_thread())
1952 return (KERN_INVALID_ARGUMENT
);
1954 assert(host_priv
== &realhost
);
1957 *prev_state
= (thread
->options
& TH_OPT_VMPRIV
) != 0;
1960 if (!(thread
->options
& TH_OPT_VMPRIV
))
1961 vm_page_free_reserve(1); /* XXX */
1962 thread
->options
|= TH_OPT_VMPRIV
;
1965 if (thread
->options
& TH_OPT_VMPRIV
)
1966 vm_page_free_reserve(-1); /* XXX */
1967 thread
->options
&= ~TH_OPT_VMPRIV
;
1970 return (KERN_SUCCESS
);
1977 * User-api wrapper for thread_wire_internal()
1981 host_priv_t host_priv
,
1985 return (thread_wire_internal(host_priv
, thread
, wired
, NULL
));
1990 is_vm_privileged(void)
1992 return current_thread()->options
& TH_OPT_VMPRIV
? TRUE
: FALSE
;
1996 set_vm_privilege(boolean_t privileged
)
1998 boolean_t was_vmpriv
;
2000 if (current_thread()->options
& TH_OPT_VMPRIV
)
2005 if (privileged
!= FALSE
)
2006 current_thread()->options
|= TH_OPT_VMPRIV
;
2008 current_thread()->options
&= ~TH_OPT_VMPRIV
;
2010 return (was_vmpriv
);
2014 set_thread_rwlock_boost(void)
2016 current_thread()->rwlock_count
++;
2020 clear_thread_rwlock_boost(void)
2022 thread_t thread
= current_thread();
2024 if ((thread
->rwlock_count
-- == 1) && (thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
2026 lck_rw_clear_promotion(thread
);
2031 * XXX assuming current thread only, for now...
2034 thread_guard_violation(thread_t thread
, unsigned type
)
2036 assert(thread
== current_thread());
2038 spl_t s
= splsched();
2040 * Use the saved state area of the thread structure
2041 * to store all info required to handle the AST when
2042 * returning to userspace
2044 thread
->guard_exc_info
.type
= type
;
2045 thread_ast_set(thread
, AST_GUARD
);
2046 ast_propagate(thread
->ast
);
2054 * Handle AST_GUARD for a thread. This routine looks at the
2055 * state saved in the thread structure to determine the cause
2056 * of this exception. Based on this value, it invokes the
2057 * appropriate routine which determines other exception related
2058 * info and raises the exception.
2061 guard_ast(thread_t thread
)
2063 if (thread
->guard_exc_info
.type
== GUARD_TYPE_MACH_PORT
)
2064 mach_port_guard_ast(thread
);
2066 fd_guard_ast(thread
);
2070 thread_cputime_callback(int warning
, __unused
const void *arg0
, __unused
const void *arg1
)
2072 if (warning
== LEDGER_WARNING_ROSE_ABOVE
) {
2073 #if CONFIG_TELEMETRY
2075 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2076 * on the entire task so there are micro-stackshots available if and when
2077 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2078 * for this thread only; but now that this task is suspect, knowing what all of
2079 * its threads are up to will be useful.
2081 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING
, 1);
2086 #if CONFIG_TELEMETRY
2088 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2089 * exceeded the limit, turn telemetry off for the task.
2091 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING
, 0);
2095 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2099 void __attribute__((noinline
))
2100 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2103 task_t task
= current_task();
2104 thread_t thread
= current_thread();
2105 uint64_t tid
= thread
->thread_id
;
2106 const char *procname
= "unknown";
2107 time_value_t thread_total_time
= {0, 0};
2108 time_value_t thread_system_time
;
2109 time_value_t thread_user_time
;
2112 uint32_t usage_percent
= 0;
2113 uint32_t interval_sec
;
2114 uint64_t interval_ns
;
2115 uint64_t balance_ns
;
2116 boolean_t fatal
= FALSE
;
2117 boolean_t send_exc_resource
= TRUE
; /* in addition to RESOURCE_NOTIFY */
2120 #ifdef EXC_RESOURCE_MONITORS
2121 mach_exception_data_type_t code
[EXCEPTION_CODE_MAX
];
2122 #endif /* EXC_RESOURCE_MONITORS */
2123 struct ledger_entry_info lei
;
2125 assert(thread
->t_threadledger
!= LEDGER_NULL
);
2128 * Extract the fatal bit and suspend the monitor (which clears the bit).
2131 if (task
->rusage_cpu_flags
& TASK_RUSECPU_FLAGS_FATAL_CPUMON
) {
2133 send_exc_resource
= TRUE
;
2135 /* Only one thread can be here at a time. Whichever makes it through
2136 first will successfully suspend the monitor and proceed to send the
2137 notification. Other threads will get an error trying to suspend the
2138 monitor and give up on sending the notification. In the first release,
2139 the monitor won't be resumed for a number of seconds, but we may
2140 eventually need to handle low-latency resume.
2142 kr
= task_suspend_cpumon(task
);
2144 if (kr
== KERN_INVALID_ARGUMENT
) return;
2147 pid
= proc_selfpid();
2148 if (task
->bsd_info
!= NULL
) {
2149 procname
= proc_name_address(task
->bsd_info
);
2153 thread_get_cpulimit(&action
, &percentage
, &interval_ns
);
2155 interval_sec
= (uint32_t)(interval_ns
/ NSEC_PER_SEC
);
2157 thread_read_times(thread
, &thread_user_time
, &thread_system_time
);
2158 time_value_add(&thread_total_time
, &thread_user_time
);
2159 time_value_add(&thread_total_time
, &thread_system_time
);
2160 ledger_get_entry_info(thread
->t_threadledger
, thread_ledgers
.cpu_time
, &lei
);
2162 /* credit/debit/balance/limit are in absolute time units;
2163 the refill info is in nanoseconds. */
2164 absolutetime_to_nanoseconds(lei
.lei_balance
, &balance_ns
);
2165 if (lei
.lei_last_refill
> 0) {
2166 usage_percent
= (uint32_t)((balance_ns
*100ULL) / lei
.lei_last_refill
);
2169 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2170 printf("process %s[%d] thread %llu caught burning CPU! "
2171 "It used more than %d%% CPU over %u seconds "
2172 "(actual recent usage: %d%% over ~%llu seconds). "
2173 "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2174 "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2175 "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2177 percentage
, interval_sec
,
2179 (lei
.lei_last_refill
+ NSEC_PER_SEC
/2) / NSEC_PER_SEC
,
2180 thread_total_time
.seconds
, thread_total_time
.microseconds
,
2181 thread_user_time
.seconds
, thread_user_time
.microseconds
,
2182 thread_system_time
.seconds
,thread_system_time
.microseconds
,
2183 lei
.lei_balance
, lei
.lei_credit
, lei
.lei_debit
,
2184 lei
.lei_limit
, lei
.lei_refill_period
, lei
.lei_last_refill
,
2185 (fatal
? " [fatal violation]" : ""));
2188 For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2189 we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2192 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2193 lei
.lei_balance
= balance_ns
;
2194 absolutetime_to_nanoseconds(lei
.lei_limit
, &lei
.lei_limit
);
2195 trace_resource_violation(RMON_CPUUSAGE_VIOLATED
, &lei
);
2196 kr
= send_resource_violation(send_cpu_usage_violation
, task
, &lei
,
2197 fatal
? kRNFatalLimitFlag
: 0);
2199 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr
);
2202 #ifdef EXC_RESOURCE_MONITORS
2203 if (send_exc_resource
) {
2204 if (disable_exc_resource
) {
2205 printf("process %s[%d] thread %llu caught burning CPU! "
2206 "EXC_RESOURCE%s supressed by a boot-arg\n",
2207 procname
, pid
, tid
, fatal
? " (and termination)" : "");
2212 printf("process %s[%d] thread %llu caught burning CPU! "
2213 "EXC_RESOURCE & termination supressed due to audio playback\n",
2214 procname
, pid
, tid
);
2220 if (send_exc_resource
) {
2221 code
[0] = code
[1] = 0;
2222 EXC_RESOURCE_ENCODE_TYPE(code
[0], RESOURCE_TYPE_CPU
);
2224 EXC_RESOURCE_ENCODE_FLAVOR(code
[0], FLAVOR_CPU_MONITOR_FATAL
);
2226 EXC_RESOURCE_ENCODE_FLAVOR(code
[0], FLAVOR_CPU_MONITOR
);
2228 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code
[0], interval_sec
);
2229 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code
[0], percentage
);
2230 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code
[1], usage_percent
);
2231 exception_triage(EXC_RESOURCE
, code
, EXCEPTION_CODE_MAX
);
2233 #endif /* EXC_RESOURCE_MONITORS */
2237 jetsam_on_ledger_cpulimit_exceeded();
2239 task_terminate_internal(task
);
2244 void thread_update_io_stats(thread_t thread
, int size
, int io_flags
)
2248 if (thread
->thread_io_stats
== NULL
|| thread
->task
->task_io_stats
== NULL
)
2251 if (io_flags
& DKIO_READ
) {
2252 UPDATE_IO_STATS(thread
->thread_io_stats
->disk_reads
, size
);
2253 UPDATE_IO_STATS_ATOMIC(thread
->task
->task_io_stats
->disk_reads
, size
);
2256 if (io_flags
& DKIO_META
) {
2257 UPDATE_IO_STATS(thread
->thread_io_stats
->metadata
, size
);
2258 UPDATE_IO_STATS_ATOMIC(thread
->task
->task_io_stats
->metadata
, size
);
2261 if (io_flags
& DKIO_PAGING
) {
2262 UPDATE_IO_STATS(thread
->thread_io_stats
->paging
, size
);
2263 UPDATE_IO_STATS_ATOMIC(thread
->task
->task_io_stats
->paging
, size
);
2266 io_tier
= ((io_flags
& DKIO_TIER_MASK
) >> DKIO_TIER_SHIFT
);
2267 assert (io_tier
< IO_NUM_PRIORITIES
);
2269 UPDATE_IO_STATS(thread
->thread_io_stats
->io_priority
[io_tier
], size
);
2270 UPDATE_IO_STATS_ATOMIC(thread
->task
->task_io_stats
->io_priority
[io_tier
], size
);
2272 /* Update Total I/O Counts */
2273 UPDATE_IO_STATS(thread
->thread_io_stats
->total_io
, size
);
2274 UPDATE_IO_STATS_ATOMIC(thread
->task
->task_io_stats
->total_io
, size
);
2276 if (!(io_flags
& DKIO_READ
)) {
2277 DTRACE_IO3(physical_writes
, struct task
*, thread
->task
, uint32_t, size
, int, io_flags
);
2278 ledger_credit(thread
->task
->ledger
, task_ledgers
.physical_writes
, size
);
2283 init_thread_ledgers(void) {
2284 ledger_template_t t
;
2287 assert(thread_ledger_template
== NULL
);
2289 if ((t
= ledger_template_create("Per-thread ledger")) == NULL
)
2290 panic("couldn't create thread ledger template");
2292 if ((idx
= ledger_entry_add(t
, "cpu_time", "sched", "ns")) < 0) {
2293 panic("couldn't create cpu_time entry for thread ledger template");
2296 if (ledger_set_callback(t
, idx
, thread_cputime_callback
, NULL
, NULL
) < 0) {
2297 panic("couldn't set thread ledger callback for cpu_time entry");
2300 thread_ledgers
.cpu_time
= idx
;
2302 thread_ledger_template
= t
;
2306 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2309 thread_get_cpulimit(int *action
, uint8_t *percentage
, uint64_t *interval_ns
)
2311 int64_t abstime
= 0;
2312 uint64_t limittime
= 0;
2313 thread_t thread
= current_thread();
2319 if (thread
->t_threadledger
== LEDGER_NULL
) {
2321 * This thread has no per-thread ledger, so it can't possibly
2322 * have a CPU limit applied.
2324 return (KERN_SUCCESS
);
2327 ledger_get_period(thread
->t_threadledger
, thread_ledgers
.cpu_time
, interval_ns
);
2328 ledger_get_limit(thread
->t_threadledger
, thread_ledgers
.cpu_time
, &abstime
);
2330 if ((abstime
== LEDGER_LIMIT_INFINITY
) || (*interval_ns
== 0)) {
2332 * This thread's CPU time ledger has no period or limit; so it
2333 * doesn't have a CPU limit applied.
2335 return (KERN_SUCCESS
);
2339 * This calculation is the converse to the one in thread_set_cpulimit().
2341 absolutetime_to_nanoseconds(abstime
, &limittime
);
2342 *percentage
= (limittime
* 100ULL) / *interval_ns
;
2343 assert(*percentage
<= 100);
2345 if (thread
->options
& TH_OPT_PROC_CPULIMIT
) {
2346 assert((thread
->options
& TH_OPT_PRVT_CPULIMIT
) == 0);
2348 *action
= THREAD_CPULIMIT_BLOCK
;
2349 } else if (thread
->options
& TH_OPT_PRVT_CPULIMIT
) {
2350 assert((thread
->options
& TH_OPT_PROC_CPULIMIT
) == 0);
2352 *action
= THREAD_CPULIMIT_EXCEPTION
;
2354 *action
= THREAD_CPULIMIT_DISABLE
;
2357 return (KERN_SUCCESS
);
2361 * Set CPU usage limit on a thread.
2363 * Calling with percentage of 0 will unset the limit for this thread.
2366 thread_set_cpulimit(int action
, uint8_t percentage
, uint64_t interval_ns
)
2368 thread_t thread
= current_thread();
2370 uint64_t limittime
= 0;
2371 uint64_t abstime
= 0;
2373 assert(percentage
<= 100);
2375 if (action
== THREAD_CPULIMIT_DISABLE
) {
2377 * Remove CPU limit, if any exists.
2379 if (thread
->t_threadledger
!= LEDGER_NULL
) {
2380 l
= thread
->t_threadledger
;
2381 ledger_set_limit(l
, thread_ledgers
.cpu_time
, LEDGER_LIMIT_INFINITY
, 0);
2382 ledger_set_action(l
, thread_ledgers
.cpu_time
, LEDGER_ACTION_IGNORE
);
2383 thread
->options
&= ~(TH_OPT_PROC_CPULIMIT
| TH_OPT_PRVT_CPULIMIT
);
2389 if (interval_ns
< MINIMUM_CPULIMIT_INTERVAL_MS
* NSEC_PER_MSEC
) {
2390 return (KERN_INVALID_ARGUMENT
);
2393 l
= thread
->t_threadledger
;
2394 if (l
== LEDGER_NULL
) {
2396 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2398 if ((l
= ledger_instantiate(thread_ledger_template
, LEDGER_CREATE_INACTIVE_ENTRIES
)) == LEDGER_NULL
)
2399 return (KERN_RESOURCE_SHORTAGE
);
2402 * We are the first to create this thread's ledger, so only activate our entry.
2404 ledger_entry_setactive(l
, thread_ledgers
.cpu_time
);
2405 thread
->t_threadledger
= l
;
2409 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2410 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2412 limittime
= (interval_ns
* percentage
) / 100;
2413 nanoseconds_to_absolutetime(limittime
, &abstime
);
2414 ledger_set_limit(l
, thread_ledgers
.cpu_time
, abstime
, cpumon_ustackshots_trigger_pct
);
2416 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2418 ledger_set_period(l
, thread_ledgers
.cpu_time
, interval_ns
);
2420 if (action
== THREAD_CPULIMIT_EXCEPTION
) {
2422 * We don't support programming the CPU usage monitor on a task if any of its
2423 * threads have a per-thread blocking CPU limit configured.
2425 if (thread
->options
& TH_OPT_PRVT_CPULIMIT
) {
2426 panic("CPU usage monitor activated, but blocking thread limit exists");
2430 * Make a note that this thread's CPU limit is being used for the task-wide CPU
2431 * usage monitor. We don't have to arm the callback which will trigger the
2432 * exception, because that was done for us in ledger_instantiate (because the
2433 * ledger template used has a default callback).
2435 thread
->options
|= TH_OPT_PROC_CPULIMIT
;
2438 * We deliberately override any CPU limit imposed by a task-wide limit (eg
2439 * CPU usage monitor).
2441 thread
->options
&= ~TH_OPT_PROC_CPULIMIT
;
2443 thread
->options
|= TH_OPT_PRVT_CPULIMIT
;
2444 /* The per-thread ledger template by default has a callback for CPU time */
2445 ledger_disable_callback(l
, thread_ledgers
.cpu_time
);
2446 ledger_set_action(l
, thread_ledgers
.cpu_time
, LEDGER_ACTION_BLOCK
);
2455 __unused thread_t thread
)
2465 thread
->sched_call
= (call
!= NULL
)? call
: sched_call_null
;
2469 thread_disable_sched_call(
2474 spl_t s
= splsched();
2475 thread_lock(thread
);
2476 if (thread
->sched_call
== call
) {
2477 thread
->sched_call
= sched_call_null
;
2481 thread_unlock(thread
);
2488 thread_reenable_sched_call(
2493 spl_t s
= splsched();
2494 thread_lock(thread
);
2495 thread_sched_call(thread
, call
);
2496 thread_unlock(thread
);
2502 thread_static_param(
2506 thread_mtx_lock(thread
);
2507 thread
->static_param
= state
;
2508 thread_mtx_unlock(thread
);
2515 return (thread
!= THREAD_NULL
? thread
->thread_id
: 0);
2518 uint16_t thread_set_tag(thread_t th
, uint16_t tag
) {
2519 return thread_set_tag_internal(th
, tag
);
2521 uint16_t thread_get_tag(thread_t th
) {
2522 return thread_get_tag_internal(th
);
2526 thread_dispatchqaddr(
2529 uint64_t dispatchqueue_addr
;
2530 uint64_t thread_handle
;
2532 if (thread
== THREAD_NULL
)
2535 thread_handle
= thread
->machine
.cthread_self
;
2536 if (thread_handle
== 0)
2539 if (thread
->inspection
== TRUE
)
2540 dispatchqueue_addr
= thread_handle
+ get_task_dispatchqueue_offset(thread
->task
);
2541 else if (thread
->task
->bsd_info
)
2542 dispatchqueue_addr
= thread_handle
+ get_dispatchqueue_offset_from_proc(thread
->task
->bsd_info
);
2544 dispatchqueue_addr
= 0;
2546 return dispatchqueue_addr
;
2550 * Export routines to other components for things that are done as macros
2551 * within the osfmk component.
2554 #undef thread_reference
2555 void thread_reference(thread_t thread
);
2560 if (thread
!= THREAD_NULL
)
2561 thread_reference_internal(thread
);
2564 #undef thread_should_halt
2570 return (thread_should_halt_fast(th
));
2574 * thread_set_voucher_name - reset the voucher port name bound to this thread
2576 * Conditions: nothing locked
2578 * If we already converted the previous name to a cached voucher
2579 * reference, then we discard that reference here. The next lookup
2580 * will cache it again.
2584 thread_set_voucher_name(mach_port_name_t voucher_name
)
2586 thread_t thread
= current_thread();
2587 ipc_voucher_t new_voucher
= IPC_VOUCHER_NULL
;
2588 ipc_voucher_t voucher
;
2590 ledger_t bankledger
= NULL
;
2593 if (MACH_PORT_DEAD
== voucher_name
)
2594 return KERN_INVALID_RIGHT
;
2597 * agressively convert to voucher reference
2599 if (MACH_PORT_VALID(voucher_name
)) {
2600 new_voucher
= convert_port_name_to_voucher(voucher_name
);
2601 if (IPC_VOUCHER_NULL
== new_voucher
)
2602 return KERN_INVALID_ARGUMENT
;
2605 bankledger
= bank_get_voucher_ledger(new_voucher
);
2608 thread_mtx_lock(thread
);
2609 voucher
= thread
->ith_voucher
;
2610 thread
->ith_voucher_name
= voucher_name
;
2611 thread
->ith_voucher
= new_voucher
;
2613 bank_swap_thread_bank_ledger(thread
, bankledger
);
2615 thread_mtx_unlock(thread
);
2617 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2618 MACHDBG_CODE(DBG_MACH_IPC
,MACH_THREAD_SET_VOUCHER
) | DBG_FUNC_NONE
,
2619 (uintptr_t)thread_tid(thread
),
2620 (uintptr_t)voucher_name
,
2621 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher
),
2624 if (IPC_VOUCHER_NULL
!= voucher
)
2625 ipc_voucher_release(voucher
);
2627 return KERN_SUCCESS
;
2631 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2633 * Conditions: nothing locked
2635 * A reference to the voucher may be lazily pending, if someone set the voucher name
2636 * but nobody has done a lookup yet. In that case, we'll have to do the equivalent
2639 * NOTE: At the moment, there is no distinction between the current and effective
2640 * vouchers because we only set them at the thread level currently.
2643 thread_get_mach_voucher(
2644 thread_act_t thread
,
2645 mach_voucher_selector_t __unused which
,
2646 ipc_voucher_t
*voucherp
)
2648 ipc_voucher_t voucher
;
2649 mach_port_name_t voucher_name
;
2651 if (THREAD_NULL
== thread
)
2652 return KERN_INVALID_ARGUMENT
;
2654 thread_mtx_lock(thread
);
2655 voucher
= thread
->ith_voucher
;
2657 /* if already cached, just return a ref */
2658 if (IPC_VOUCHER_NULL
!= voucher
) {
2659 ipc_voucher_reference(voucher
);
2660 thread_mtx_unlock(thread
);
2661 *voucherp
= voucher
;
2662 return KERN_SUCCESS
;
2665 voucher_name
= thread
->ith_voucher_name
;
2667 /* convert the name to a port, then voucher reference */
2668 if (MACH_PORT_VALID(voucher_name
)) {
2672 ipc_object_copyin(thread
->task
->itk_space
, voucher_name
,
2673 MACH_MSG_TYPE_COPY_SEND
, (ipc_object_t
*)&port
)) {
2674 thread
->ith_voucher_name
= MACH_PORT_NULL
;
2675 thread_mtx_unlock(thread
);
2676 *voucherp
= IPC_VOUCHER_NULL
;
2677 return KERN_SUCCESS
;
2680 /* convert to a voucher ref to return, and cache a ref on thread */
2681 voucher
= convert_port_to_voucher(port
);
2682 ipc_voucher_reference(voucher
);
2683 thread
->ith_voucher
= voucher
;
2684 thread_mtx_unlock(thread
);
2686 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2687 MACHDBG_CODE(DBG_MACH_IPC
,MACH_THREAD_SET_VOUCHER
) | DBG_FUNC_NONE
,
2688 (uintptr_t)thread_tid(thread
),
2690 VM_KERNEL_ADDRPERM((uintptr_t)voucher
),
2694 ipc_port_release_send(port
);
2696 thread_mtx_unlock(thread
);
2698 *voucherp
= voucher
;
2699 return KERN_SUCCESS
;
2703 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2705 * Conditions: callers holds a reference on the voucher.
2708 * We grab another reference to the voucher and bind it to the thread. Any lazy
2709 * binding is erased. The old voucher reference associated with the thread is
2713 thread_set_mach_voucher(
2715 ipc_voucher_t voucher
)
2717 ipc_voucher_t old_voucher
;
2719 ledger_t bankledger
= NULL
;
2722 if (THREAD_NULL
== thread
)
2723 return KERN_INVALID_ARGUMENT
;
2725 if (thread
!= current_thread() || thread
->started
)
2726 return KERN_INVALID_ARGUMENT
;
2729 ipc_voucher_reference(voucher
);
2731 bankledger
= bank_get_voucher_ledger(voucher
);
2733 thread_mtx_lock(thread
);
2734 old_voucher
= thread
->ith_voucher
;
2735 thread
->ith_voucher
= voucher
;
2736 thread
->ith_voucher_name
= MACH_PORT_NULL
;
2738 bank_swap_thread_bank_ledger(thread
, bankledger
);
2740 thread_mtx_unlock(thread
);
2742 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2743 MACHDBG_CODE(DBG_MACH_IPC
,MACH_THREAD_SET_VOUCHER
) | DBG_FUNC_NONE
,
2744 (uintptr_t)thread_tid(thread
),
2745 (uintptr_t)MACH_PORT_NULL
,
2746 VM_KERNEL_ADDRPERM((uintptr_t)voucher
),
2749 ipc_voucher_release(old_voucher
);
2751 return KERN_SUCCESS
;
2755 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2757 * Conditions: callers holds a reference on the new and presumed old voucher(s).
2760 * If the old voucher is still the same as passed in, replace it with new voucher
2761 * and discard the old (and the reference passed in). Otherwise, discard the new
2762 * and return an updated old voucher.
2765 thread_swap_mach_voucher(
2767 ipc_voucher_t new_voucher
,
2768 ipc_voucher_t
*in_out_old_voucher
)
2770 mach_port_name_t old_voucher_name
;
2771 ipc_voucher_t old_voucher
;
2773 ledger_t bankledger
= NULL
;
2776 if (THREAD_NULL
== thread
)
2777 return KERN_INVALID_TASK
;
2779 if (thread
!= current_thread() || thread
->started
)
2780 return KERN_INVALID_ARGUMENT
;
2783 bankledger
= bank_get_voucher_ledger(new_voucher
);
2786 thread_mtx_lock(thread
);
2788 old_voucher
= thread
->ith_voucher
;
2790 if (IPC_VOUCHER_NULL
== old_voucher
) {
2791 old_voucher_name
= thread
->ith_voucher_name
;
2793 /* perform lazy binding if needed */
2794 if (MACH_PORT_VALID(old_voucher_name
)) {
2795 old_voucher
= convert_port_name_to_voucher(old_voucher_name
);
2796 thread
->ith_voucher_name
= MACH_PORT_NULL
;
2797 thread
->ith_voucher
= old_voucher
;
2799 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2800 MACHDBG_CODE(DBG_MACH_IPC
,MACH_THREAD_SET_VOUCHER
) | DBG_FUNC_NONE
,
2801 (uintptr_t)thread_tid(thread
),
2802 (uintptr_t)old_voucher_name
,
2803 VM_KERNEL_ADDRPERM((uintptr_t)old_voucher
),
2809 /* swap in new voucher, if old voucher matches the one supplied */
2810 if (old_voucher
== *in_out_old_voucher
) {
2811 ipc_voucher_reference(new_voucher
);
2812 thread
->ith_voucher
= new_voucher
;
2813 thread
->ith_voucher_name
= MACH_PORT_NULL
;
2815 bank_swap_thread_bank_ledger(thread
, bankledger
);
2817 thread_mtx_unlock(thread
);
2819 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2820 MACHDBG_CODE(DBG_MACH_IPC
,MACH_THREAD_SET_VOUCHER
) | DBG_FUNC_NONE
,
2821 (uintptr_t)thread_tid(thread
),
2822 (uintptr_t)MACH_PORT_NULL
,
2823 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher
),
2826 ipc_voucher_release(old_voucher
);
2828 *in_out_old_voucher
= IPC_VOUCHER_NULL
;
2829 return KERN_SUCCESS
;
2832 /* Otherwise, just return old voucher reference */
2833 ipc_voucher_reference(old_voucher
);
2834 thread_mtx_unlock(thread
);
2835 *in_out_old_voucher
= old_voucher
;
2836 return KERN_SUCCESS
;
2840 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2843 thread_get_current_voucher_origin_pid(
2848 thread_t thread
= current_thread();
2850 buf_size
= sizeof(*pid
);
2851 kr
= mach_voucher_attr_command(thread
->ith_voucher
,
2852 MACH_VOUCHER_ATTR_KEY_BANK
,
2853 BANK_ORIGINATOR_PID
,
2856 (mach_voucher_attr_content_t
)pid
,
2863 thread_has_thread_name(thread_t th
)
2865 if ((th
) && (th
->uthread
)) {
2866 return bsd_hasthreadname(th
->uthread
);
2870 * This is an odd case; clients may set the thread name based on the lack of
2871 * a name, but in this context there is no uthread to attach the name to.
2877 thread_set_thread_name(thread_t th
, const char* name
)
2879 if ((th
) && (th
->uthread
) && name
) {
2880 bsd_setthreadname(th
->uthread
, name
);
2885 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
2887 void thread_enable_send_importance(thread_t thread
, boolean_t enable
)
2890 thread
->options
|= TH_OPT_SEND_IMPORTANCE
;
2892 thread
->options
&= ~TH_OPT_SEND_IMPORTANCE
;
2896 uint32_t dtrace_get_thread_predcache(thread_t thread
)
2898 if (thread
!= THREAD_NULL
)
2899 return thread
->t_dtrace_predcache
;
2904 int64_t dtrace_get_thread_vtime(thread_t thread
)
2906 if (thread
!= THREAD_NULL
)
2907 return thread
->t_dtrace_vtime
;
2912 int dtrace_get_thread_last_cpu_id(thread_t thread
)
2914 if ((thread
!= THREAD_NULL
) && (thread
->last_processor
!= PROCESSOR_NULL
)) {
2915 return thread
->last_processor
->cpu_id
;
2921 int64_t dtrace_get_thread_tracing(thread_t thread
)
2923 if (thread
!= THREAD_NULL
)
2924 return thread
->t_dtrace_tracing
;
2929 boolean_t
dtrace_get_thread_reentering(thread_t thread
)
2931 if (thread
!= THREAD_NULL
)
2932 return (thread
->options
& TH_OPT_DTRACE
) ? TRUE
: FALSE
;
2937 vm_offset_t
dtrace_get_kernel_stack(thread_t thread
)
2939 if (thread
!= THREAD_NULL
)
2940 return thread
->kernel_stack
;
2945 int64_t dtrace_calc_thread_recent_vtime(thread_t thread
)
2947 if (thread
!= THREAD_NULL
) {
2948 processor_t processor
= current_processor();
2949 uint64_t abstime
= mach_absolute_time();
2952 timer
= PROCESSOR_DATA(processor
, thread_timer
);
2954 return timer_grab(&(thread
->system_timer
)) + timer_grab(&(thread
->user_timer
)) +
2955 (abstime
- timer
->tstamp
); /* XXX need interrupts off to prevent missed time? */
2960 void dtrace_set_thread_predcache(thread_t thread
, uint32_t predcache
)
2962 if (thread
!= THREAD_NULL
)
2963 thread
->t_dtrace_predcache
= predcache
;
2966 void dtrace_set_thread_vtime(thread_t thread
, int64_t vtime
)
2968 if (thread
!= THREAD_NULL
)
2969 thread
->t_dtrace_vtime
= vtime
;
2972 void dtrace_set_thread_tracing(thread_t thread
, int64_t accum
)
2974 if (thread
!= THREAD_NULL
)
2975 thread
->t_dtrace_tracing
= accum
;
2978 void dtrace_set_thread_reentering(thread_t thread
, boolean_t vbool
)
2980 if (thread
!= THREAD_NULL
) {
2982 thread
->options
|= TH_OPT_DTRACE
;
2984 thread
->options
&= (~TH_OPT_DTRACE
);
2988 vm_offset_t
dtrace_set_thread_recover(thread_t thread
, vm_offset_t recover
)
2990 vm_offset_t prev
= 0;
2992 if (thread
!= THREAD_NULL
) {
2993 prev
= thread
->recover
;
2994 thread
->recover
= recover
;
2999 void dtrace_thread_bootstrap(void)
3001 task_t task
= current_task();
3003 if (task
->thread_count
== 1) {
3004 thread_t thread
= current_thread();
3005 if (thread
->t_dtrace_flags
& TH_DTRACE_EXECSUCCESS
) {
3006 thread
->t_dtrace_flags
&= ~TH_DTRACE_EXECSUCCESS
;
3007 DTRACE_PROC(exec__success
);
3008 KDBG(BSDDBG_CODE(DBG_BSD_PROC
,BSD_PROC_EXEC
),
3013 DTRACE_PROC(lwp__start
);
3018 dtrace_thread_didexec(thread_t thread
)
3020 thread
->t_dtrace_flags
|= TH_DTRACE_EXECSUCCESS
;
3022 #endif /* CONFIG_DTRACE */