X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b0d623f7f2ae71ed96e60569f61f9a9a27016e80..143464d58d2bd6378e74eec636961ceb0d32fb91:/osfmk/kern/thread.c diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index b33a7d2be..6f6fd6ff9 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -91,11 +91,14 @@ #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -112,6 +115,11 @@ #include #include #include +#include +#include +#if KPC +#include +#endif #include #include @@ -151,14 +159,39 @@ static void sched_call_null( #ifdef MACH_BSD extern void proc_exit(void *); extern uint64_t get_dispatchqueue_offset_from_proc(void *); +extern int proc_selfpid(void); +extern char * proc_name_address(void *p); #endif /* MACH_BSD */ +extern int disable_exc_resource; +extern int audio_active; extern int debug_task; int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ int task_threadmax = CONFIG_THREAD_MAX; static uint64_t thread_unique_id = 0; +struct _thread_ledger_indices thread_ledgers = { -1 }; +static ledger_template_t thread_ledger_template = NULL; +void init_thread_ledgers(void); +int task_disable_cpumon(task_t task); + +/* + * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry. + * + * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user + * stacktraces, aka micro-stackshots) + */ +#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70 + +int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */ +void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void); + +/* + * The smallest interval over which we support limiting CPU consumption is 1ms + */ +#define MINIMUM_CPULIMIT_INTERVAL_MS 1 + void thread_bootstrap(void) { @@ -182,8 +215,9 @@ thread_bootstrap(void) thread_template.parameter = NULL; thread_template.importance = 0; - thread_template.sched_mode = 0; - thread_template.safe_mode = 0; + thread_template.sched_mode = TH_MODE_NONE; + thread_template.sched_flags = 0; + thread_template.saved_mode = TH_MODE_NONE; thread_template.safe_release = 0; thread_template.priority = 0; @@ -193,19 +227,24 @@ thread_bootstrap(void) thread_template.promotions = 0; thread_template.pending_promoter_index = 0; thread_template.pending_promoter[0] = - thread_template.pending_promoter[1] = NULL; + thread_template.pending_promoter[1] = NULL; + thread_template.rwlock_count = 0; thread_template.realtime.deadline = UINT64_MAX; thread_template.current_quantum = 0; + thread_template.last_run_time = 0; + thread_template.last_quantum_refill_time = 0; thread_template.computation_metered = 0; thread_template.computation_epoch = 0; +#if defined(CONFIG_SCHED_TRADITIONAL) thread_template.sched_stamp = 0; - thread_template.sched_usage = 0; thread_template.pri_shift = INT8_MAX; + thread_template.sched_usage = 0; thread_template.cpu_usage = thread_template.cpu_delta = 0; +#endif thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0; thread_template.bound_processor = PROCESSOR_NULL; @@ -241,12 +280,35 @@ thread_bootstrap(void) thread_template.t_dtrace_tracing = 0; #endif /* CONFIG_DTRACE */ +#if KPC + thread_template.kpc_buf = NULL; +#endif + thread_template.t_chud = 0; thread_template.t_page_creation_count = 0; thread_template.t_page_creation_time = 0; thread_template.affinity_set = NULL; + thread_template.syscalls_unix = 0; + thread_template.syscalls_mach = 0; + + thread_template.t_ledger = LEDGER_NULL; + thread_template.t_threadledger = LEDGER_NULL; + + thread_template.requested_policy = default_task_requested_policy; + thread_template.effective_policy = default_task_effective_policy; + thread_template.pended_policy = default_task_pended_policy; + + thread_template.iotier_override = THROTTLE_LEVEL_NONE; + + thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0; + + thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0; + thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0; + + thread_template.thread_tag = 0; + init_thread = thread_template; machine_set_current_thread(&init_thread); } @@ -259,7 +321,7 @@ thread_init(void) thread_max * sizeof(struct thread), THREAD_CHUNK * sizeof(struct thread), "threads"); - + lck_grp_attr_setdefault(&thread_lck_grp_attr); lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr); lck_attr_setdefault(&thread_lck_attr); @@ -271,6 +333,13 @@ thread_init(void) * per-thread structures necessary. */ machine_thread_init(); + + if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct, + sizeof (cpumon_ustackshots_trigger_pct))) { + cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT; + } + + init_thread_ledgers(); } static void @@ -287,16 +356,17 @@ void thread_terminate_self(void) { thread_t thread = current_thread(); + task_t task; spl_t s; int threadcnt; + pal_thread_terminate_self(thread); + DTRACE_PROC(lwp__exit); thread_mtx_lock(thread); - ulock_release_all(thread); - ipc_thread_disable(thread); thread_mtx_unlock(thread); @@ -308,8 +378,8 @@ thread_terminate_self(void) * Cancel priority depression, wait for concurrent expirations * on other processors. */ - if (thread->sched_mode & TH_MODE_ISDEPRESSED) { - thread->sched_mode &= ~TH_MODE_ISDEPRESSED; + if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { + thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; if (timer_call_cancel(&thread->depress_timer)) thread->depress_timer_active--; @@ -332,6 +402,7 @@ thread_terminate_self(void) thread_policy_reset(thread); + task = thread->task; uthread_cleanup(task, thread->uthread, task->bsd_info); threadcnt = hw_atomic_sub(&task->active_thread_count, 1); @@ -373,8 +444,7 @@ thread_terminate_self(void) * If there is a reserved stack, release it. */ if (thread->reserved_stack != 0) { - if (thread->reserved_stack != thread->kernel_stack) - stack_free_stack(thread->reserved_stack); + stack_free_reserved(thread); thread->reserved_stack = 0; } @@ -384,6 +454,7 @@ thread_terminate_self(void) thread->state |= TH_TERMINATE; thread_mark_wait_locked(thread, THREAD_UNINT); assert(thread->promotions == 0); + assert(thread->rwlock_count == 0); thread_unlock(thread); /* splsched */ @@ -403,6 +474,14 @@ thread_deallocate( if (thread_deallocate_internal(thread) > 0) return; + if(!(thread->state & TH_TERMINATE2)) + panic("thread_deallocate: thread not properly terminated\n"); + +#if KPC + kpc_thread_destroy(thread); +#endif + + ipc_thread_terminate(thread); task = thread->task; @@ -416,7 +495,10 @@ thread_deallocate( } #endif /* MACH_BSD */ - task_deallocate(task); + if (thread->t_ledger) + ledger_dereference(thread->t_ledger); + if (thread->t_threadledger) + ledger_dereference(thread->t_threadledger); if (thread->kernel_stack != 0) stack_free(thread); @@ -424,6 +506,8 @@ thread_deallocate( lck_mtx_destroy(&thread->mutex, &thread_lck_grp); machine_thread_destroy(thread); + task_deallocate(task); + zfree(thread_zone, thread); } @@ -435,8 +519,11 @@ thread_deallocate( static void thread_terminate_daemon(void) { - thread_t thread; - task_t task; + thread_t self, thread; + task_t task; + + self = current_thread(); + self->options |= TH_OPT_SYSTEM_CRITICAL; (void)splsched(); simple_lock(&thread_terminate_lock); @@ -449,12 +536,21 @@ thread_terminate_daemon(void) task_lock(task); task->total_user_time += timer_grab(&thread->user_timer); - task->total_system_time += timer_grab(&thread->system_timer); + if (thread->precise_user_kernel_time) { + task->total_system_time += timer_grab(&thread->system_timer); + } else { + task->total_user_time += timer_grab(&thread->system_timer); + } task->c_switch += thread->c_switch; task->p_switch += thread->p_switch; task->ps_switch += thread->ps_switch; + task->syscalls_unix += thread->syscalls_unix; + task->syscalls_mach += thread->syscalls_mach; + + task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; + task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; queue_remove(&task->threads, thread, thread_t, task_threads); task->thread_count--; @@ -482,6 +578,7 @@ thread_terminate_daemon(void) simple_unlock(&thread_terminate_lock); /* splsched */ + self->options &= ~TH_OPT_SYSTEM_CRITICAL; thread_block((thread_continue_t)thread_terminate_daemon); /*NOTREACHED*/ } @@ -514,25 +611,29 @@ static void thread_stack_daemon(void) { thread_t thread; + spl_t s; + s = splsched(); simple_lock(&thread_stack_lock); while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) { simple_unlock(&thread_stack_lock); + splx(s); + /* allocate stack with interrupts enabled so that we can call into VM */ stack_alloc(thread); - (void)splsched(); + s = splsched(); thread_lock(thread); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); thread_unlock(thread); - (void)spllo(); simple_lock(&thread_stack_lock); } assert_wait((event_t)&thread_stack_queue, THREAD_UNINT); simple_unlock(&thread_stack_lock); + splx(s); thread_block((thread_continue_t)thread_stack_daemon); /*NOTREACHED*/ @@ -560,7 +661,7 @@ void thread_daemon_init(void) { kern_return_t result; - thread_t thread; + thread_t thread = NULL; simple_lock_init(&thread_terminate_lock, 0); queue_init(&thread_terminate_queue); @@ -635,7 +736,7 @@ thread_create_internal( return (KERN_FAILURE); } - new_thread->task = parent_task; + new_thread->task = parent_task; thread_lock_init(new_thread); wake_lock_init(new_thread); @@ -643,7 +744,6 @@ thread_create_internal( lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); ipc_thread_init(new_thread); - queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; @@ -682,6 +782,18 @@ thread_create_internal( task_reference_internal(parent_task); + if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { + /* + * This task has a per-thread CPU limit; make sure this new thread + * gets its limit set too, before it gets out of the kernel. + */ + set_astledger(new_thread); + } + new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */ + new_thread->t_ledger = new_thread->task->ledger; + if (new_thread->t_ledger) + ledger_reference(new_thread->t_ledger); + /* Cache the task's map */ new_thread->map = parent_task->map; @@ -709,20 +821,30 @@ thread_create_internal( new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? THREAD_PMC_FLAG : 0U; #endif +#if KPC + kpc_thread_create(new_thread); +#endif + + /* Only need to update policies pushed from task to thread */ + new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier; + new_thread->requested_policy.terminated = parent_task->effective_policy.terminated; /* Set the thread's scheduling parameters */ - if (parent_task != kernel_task) - new_thread->sched_mode |= TH_MODE_TIMESHARE; + new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task); + new_thread->sched_flags = 0; new_thread->max_priority = parent_task->max_priority; new_thread->task_priority = parent_task->priority; new_thread->priority = (priority < 0)? parent_task->priority: priority; if (new_thread->priority > new_thread->max_priority) new_thread->priority = new_thread->max_priority; - new_thread->importance = - new_thread->priority - new_thread->task_priority; + new_thread->importance = new_thread->priority - new_thread->task_priority; + new_thread->saved_importance = new_thread->importance; + +#if defined(CONFIG_SCHED_TRADITIONAL) new_thread->sched_stamp = sched_tick; new_thread->pri_shift = sched_pri_shift; - compute_priority(new_thread, FALSE); +#endif + SCHED(compute_priority)(new_thread, FALSE); new_thread->active = TRUE; @@ -733,16 +855,16 @@ thread_create_internal( kdbg_trace_data(parent_task->bsd_info, &dbg_arg2); - KERNEL_DEBUG_CONSTANT( - TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, - (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, + (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); - KERNEL_DEBUG_CONSTANT( - TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); } DTRACE_PROC1(lwp__create, thread_t, *out_thread); @@ -750,10 +872,11 @@ thread_create_internal( return (KERN_SUCCESS); } -kern_return_t -thread_create( +static kern_return_t +thread_create_internal2( task_t task, - thread_t *new_thread) + thread_t *new_thread, + boolean_t from_user) { kern_return_t result; thread_t thread; @@ -770,6 +893,9 @@ thread_create( if (task->suspend_count > 0) thread_hold(thread); + if (from_user) + extmod_statistics_incr_thread_create(task); + task_unlock(task); lck_mtx_unlock(&tasks_threads_lock); @@ -778,13 +904,36 @@ thread_create( return (KERN_SUCCESS); } +/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */ kern_return_t -thread_create_running( +thread_create( + task_t task, + thread_t *new_thread); + +kern_return_t +thread_create( + task_t task, + thread_t *new_thread) +{ + return thread_create_internal2(task, new_thread, FALSE); +} + +kern_return_t +thread_create_from_user( + task_t task, + thread_t *new_thread) +{ + return thread_create_internal2(task, new_thread, TRUE); +} + +static kern_return_t +thread_create_running_internal2( register task_t task, int flavor, thread_state_t new_state, mach_msg_type_number_t new_state_count, - thread_t *new_thread) + thread_t *new_thread, + boolean_t from_user) { register kern_return_t result; thread_t thread; @@ -811,6 +960,9 @@ thread_create_running( thread_start_internal(thread); thread_mtx_unlock(thread); + if (from_user) + extmod_statistics_incr_thread_create(task); + task_unlock(task); lck_mtx_unlock(&tasks_threads_lock); @@ -819,9 +971,45 @@ thread_create_running( return (result); } +/* Prototype, see justification above */ +kern_return_t +thread_create_running( + register task_t task, + int flavor, + thread_state_t new_state, + mach_msg_type_number_t new_state_count, + thread_t *new_thread); + +kern_return_t +thread_create_running( + register task_t task, + int flavor, + thread_state_t new_state, + mach_msg_type_number_t new_state_count, + thread_t *new_thread) +{ + return thread_create_running_internal2( + task, flavor, new_state, new_state_count, + new_thread, FALSE); +} + +kern_return_t +thread_create_running_from_user( + register task_t task, + int flavor, + thread_state_t new_state, + mach_msg_type_number_t new_state_count, + thread_t *new_thread) +{ + return thread_create_running_internal2( + task, flavor, new_state, new_state_count, + new_thread, TRUE); +} + kern_return_t thread_create_workq( task_t task, + thread_continue_t thread_return, thread_t *new_thread) { kern_return_t result; @@ -830,8 +1018,7 @@ thread_create_workq( if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); - result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, - TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread); + result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread); if (result != KERN_SUCCESS) return (result); @@ -874,9 +1061,6 @@ kernel_thread_create( stack_alloc(thread); assert(thread->kernel_stack != 0); -#if CONFIG_EMBEDDED - if (priority > BASEPRI_KERNEL) -#endif thread->reserved_stack = thread->kernel_stack; thread->parameter = parameter; @@ -920,29 +1104,6 @@ kernel_thread_start( return kernel_thread_start_priority(continuation, parameter, -1, new_thread); } -#ifndef __LP64__ - -thread_t -kernel_thread( - task_t task, - void (*start)(void)) -{ - kern_return_t result; - thread_t thread; - - if (task != kernel_task) - panic("kernel_thread"); - - result = kernel_thread_start_priority((thread_continue_t)start, NULL, -1, &thread); - if (result != KERN_SUCCESS) - return (THREAD_NULL); - - thread_deallocate(thread); - - return (thread); -} - -#endif /* __LP64__ */ kern_return_t thread_info_internal( @@ -976,8 +1137,8 @@ thread_info_internal( /* * Update lazy-evaluated scheduler info because someone wants it. */ - if (thread->sched_stamp != sched_tick) - update_priority(thread); + if (SCHED(can_update_priority)(thread)) + SCHED(update_priority)(thread); basic_info->sleep_time = 0; @@ -986,18 +1147,23 @@ thread_info_internal( * then for 5/8 ageing. The correction factor [3/5] is * (1/(5/8) - 1). */ - basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage - * TH_USAGE_SCALE) / sched_tick_interval); - basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; - + basic_info->cpu_usage = 0; +#if defined(CONFIG_SCHED_TRADITIONAL) + if (sched_tick_interval) { + basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage + * TH_USAGE_SCALE) / sched_tick_interval); + basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; + } +#endif + if (basic_info->cpu_usage > TH_USAGE_SCALE) basic_info->cpu_usage = TH_USAGE_SCALE; - basic_info->policy = ((thread->sched_mode & TH_MODE_TIMESHARE)? + basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)? POLICY_TIMESHARE: POLICY_RR); flags = 0; - if (thread->bound_processor != PROCESSOR_NULL && thread->bound_processor->idle_thread == thread) + if (thread->options & TH_OPT_IDLE_THREAD) flags |= TH_FLAGS_IDLE; if (!thread->kernel_stack) @@ -1044,11 +1210,7 @@ thread_info_internal( thread_lock(thread); identifier_info->thread_id = thread->thread_id; -#if defined(__ppc__) || defined(__arm__) identifier_info->thread_handle = thread->machine.cthread_self; -#else - identifier_info->thread_handle = thread->machine.pcb->cthread_self; -#endif if(thread->task->bsd_info) { identifier_info->dispatch_qaddr = identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); } else { @@ -1073,14 +1235,14 @@ thread_info_internal( s = splsched(); thread_lock(thread); - if (!(thread->sched_mode & TH_MODE_TIMESHARE)) { + if (thread->sched_mode != TH_MODE_TIMESHARE) { thread_unlock(thread); splx(s); return (KERN_INVALID_POLICY); } - ts_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0; + ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0; if (ts_info->depressed) { ts_info->base_priority = DEPRESSPRI; ts_info->depress_priority = thread->priority; @@ -1110,7 +1272,9 @@ thread_info_internal( else if (flavor == THREAD_SCHED_RR_INFO) { policy_rr_info_t rr_info; - + uint32_t quantum_time; + uint64_t quantum_ns; + if (*thread_info_count < POLICY_RR_INFO_COUNT) return (KERN_INVALID_ARGUMENT); @@ -1119,14 +1283,14 @@ thread_info_internal( s = splsched(); thread_lock(thread); - if (thread->sched_mode & TH_MODE_TIMESHARE) { + if (thread->sched_mode == TH_MODE_TIMESHARE) { thread_unlock(thread); splx(s); return (KERN_INVALID_POLICY); } - rr_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0; + rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0; if (rr_info->depressed) { rr_info->base_priority = DEPRESSPRI; rr_info->depress_priority = thread->priority; @@ -1136,8 +1300,11 @@ thread_info_internal( rr_info->depress_priority = -1; } + quantum_time = SCHED(initial_quantum_size)(THREAD_NULL); + absolutetime_to_nanoseconds(quantum_time, &quantum_ns); + rr_info->max_priority = thread->max_priority; - rr_info->quantum = std_quantum_us / 1000; + rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000); thread_unlock(thread); splx(s); @@ -1158,14 +1325,29 @@ thread_read_times( { clock_sec_t secs; clock_usec_t usecs; + uint64_t tval_user, tval_system; - absolutetime_to_microtime(timer_grab(&thread->user_timer), &secs, &usecs); - user_time->seconds = (typeof(user_time->seconds))secs; - user_time->microseconds = usecs; + tval_user = timer_grab(&thread->user_timer); + tval_system = timer_grab(&thread->system_timer); - absolutetime_to_microtime(timer_grab(&thread->system_timer), &secs, &usecs); - system_time->seconds = (typeof(system_time->seconds))secs; - system_time->microseconds = usecs; + if (thread->precise_user_kernel_time) { + absolutetime_to_microtime(tval_user, &secs, &usecs); + user_time->seconds = (typeof(user_time->seconds))secs; + user_time->microseconds = usecs; + + absolutetime_to_microtime(tval_system, &secs, &usecs); + system_time->seconds = (typeof(system_time->seconds))secs; + system_time->microseconds = usecs; + } else { + /* system_timer may represent either sys or user */ + tval_user += tval_system; + absolutetime_to_microtime(tval_user, &secs, &usecs); + user_time->seconds = (typeof(user_time->seconds))secs; + user_time->microseconds = usecs; + + system_time->seconds = 0; + system_time->microseconds = 0; + } } kern_return_t @@ -1257,6 +1439,359 @@ thread_wire( return (thread_wire_internal(host_priv, thread, wired, NULL)); } + +/* + * XXX assuming current thread only, for now... + */ +void +thread_guard_violation(thread_t thread, unsigned type) +{ + assert(thread == current_thread()); + + spl_t s = splsched(); + /* + * Use the saved state area of the thread structure + * to store all info required to handle the AST when + * returning to userspace + */ + thread->guard_exc_info.type = type; + thread_ast_set(thread, AST_GUARD); + ast_propagate(thread->ast); + + splx(s); +} + +/* + * guard_ast: + * + * Handle AST_GUARD for a thread. This routine looks at the + * state saved in the thread structure to determine the cause + * of this exception. Based on this value, it invokes the + * appropriate routine which determines other exception related + * info and raises the exception. + */ +void +guard_ast(thread_t thread) +{ + if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT) + mach_port_guard_ast(thread); + else + fd_guard_ast(thread); +} + +static void +thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1) +{ + if (warning == LEDGER_WARNING_ROSE_ABOVE) { +#if CONFIG_TELEMETRY + /* + * This thread is in danger of violating the CPU usage monitor. Enable telemetry + * on the entire task so there are micro-stackshots available if and when + * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots + * for this thread only; but now that this task is suspect, knowing what all of + * its threads are up to will be useful. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1); +#endif + return; + } + +#if CONFIG_TELEMETRY + /* + * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or + * exceeded the limit, turn telemetry off for the task. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0); +#endif + + if (warning == 0) { + THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(); + } +} + +void __attribute__((noinline)) +THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) +{ + int pid = 0; + task_t task = current_task(); + thread_t thread = current_thread(); + uint64_t tid = thread->thread_id; + char *procname = (char *) "unknown"; + time_value_t thread_total_time = {0, 0}; + time_value_t thread_system_time; + time_value_t thread_user_time; + int action; + uint8_t percentage; + uint32_t limit_percent; + uint32_t usage_percent; + uint32_t interval_sec; + uint64_t interval_ns; + uint64_t balance_ns; + boolean_t fatal = FALSE; + + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + struct ledger_entry_info lei; + + assert(thread->t_threadledger != LEDGER_NULL); + + /* + * Now that a thread has tripped the monitor, disable it for the entire task. + */ + task_lock(task); + if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { + /* + * The CPU usage monitor has been disabled on our task, so some other + * thread must have gotten here first. We only send one exception per + * task lifetime, so there's nothing left for us to do here. + */ + task_unlock(task); + return; + } + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) { + fatal = TRUE; + } + task_disable_cpumon(task); + task_unlock(task); + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(task->bsd_info); +#endif + + thread_get_cpulimit(&action, &percentage, &interval_ns); + + interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC); + + thread_read_times(thread, &thread_user_time, &thread_system_time); + time_value_add(&thread_total_time, &thread_user_time); + time_value_add(&thread_total_time, &thread_system_time); + + ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei); + + absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns); + usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill); + + /* Show refill period in the same units as balance, limit, etc */ + nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period); + + limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period); + + /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */ + + if (disable_exc_resource) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed by a boot-arg\n", procname, pid, tid); + return; + } + + if (audio_active) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed due to audio playback\n", procname, pid, tid); + return; + } + printf("process %s[%d] thread %llu caught burning CPU! " + "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " + "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " + "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) " + "period: %llu time since last refill (ns): %llu \n", + procname, pid, tid, + percentage, usage_percent, interval_sec, + thread_total_time.seconds, thread_total_time.microseconds, + thread_user_time.seconds, thread_user_time.microseconds, + thread_system_time.seconds, thread_system_time.microseconds, + lei.lei_balance, + lei.lei_credit, lei.lei_debit, + lei.lei_limit, limit_percent, + lei.lei_refill_period, lei.lei_last_refill); + + + code[0] = code[1] = 0; + EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU); + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); + EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent); + exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + + if (fatal) { + task_terminate_internal(task); + } +} + +void +init_thread_ledgers(void) { + ledger_template_t t; + int idx; + + assert(thread_ledger_template == NULL); + + if ((t = ledger_template_create("Per-thread ledger")) == NULL) + panic("couldn't create thread ledger template"); + + if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) { + panic("couldn't create cpu_time entry for thread ledger template"); + } + + if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) { + panic("couldn't set thread ledger callback for cpu_time entry"); + } + + thread_ledgers.cpu_time = idx; + thread_ledger_template = t; +} + +/* + * Returns currently applied CPU usage limit, or 0/0 if none is applied. + */ +int +thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns) +{ + int64_t abstime = 0; + uint64_t limittime = 0; + thread_t thread = current_thread(); + + *percentage = 0; + *interval_ns = 0; + *action = 0; + + if (thread->t_threadledger == LEDGER_NULL) { + /* + * This thread has no per-thread ledger, so it can't possibly + * have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns); + ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime); + + if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) { + /* + * This thread's CPU time ledger has no period or limit; so it + * doesn't have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + /* + * This calculation is the converse to the one in thread_set_cpulimit(). + */ + absolutetime_to_nanoseconds(abstime, &limittime); + *percentage = (limittime * 100ULL) / *interval_ns; + assert(*percentage <= 100); + + if (thread->options & TH_OPT_PROC_CPULIMIT) { + assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_BLOCK; + } else if (thread->options & TH_OPT_PRVT_CPULIMIT) { + assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_EXCEPTION; + } else { + *action = THREAD_CPULIMIT_DISABLE; + } + + return (KERN_SUCCESS); +} + +/* + * Set CPU usage limit on a thread. + * + * Calling with percentage of 0 will unset the limit for this thread. + */ +int +thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) +{ + thread_t thread = current_thread(); + ledger_t l; + uint64_t limittime = 0; + uint64_t abstime = 0; + + assert(percentage <= 100); + + if (action == THREAD_CPULIMIT_DISABLE) { + /* + * Remove CPU limit, if any exists. + */ + if (thread->t_threadledger != LEDGER_NULL) { + l = thread->t_threadledger; + /* + * The only way to get a per-thread ledger is via CPU limits. + */ + assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)); + thread->t_threadledger = NULL; + ledger_dereference(l); + thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT); + } + + return (0); + } + + if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) { + return (KERN_INVALID_ARGUMENT); + } + + l = thread->t_threadledger; + if (l == LEDGER_NULL) { + /* + * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active. + */ + if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) + return (KERN_RESOURCE_SHORTAGE); + + /* + * We are the first to create this thread's ledger, so only activate our entry. + */ + ledger_entry_setactive(l, thread_ledgers.cpu_time); + thread->t_threadledger = l; + } + + /* + * The limit is specified as a percentage of CPU over an interval in nanoseconds. + * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit. + */ + limittime = (interval_ns * percentage) / 100; + nanoseconds_to_absolutetime(limittime, &abstime); + ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct); + /* + * Refill the thread's allotted CPU time every interval_ns nanoseconds. + */ + ledger_set_period(l, thread_ledgers.cpu_time, interval_ns); + + if (action == THREAD_CPULIMIT_EXCEPTION) { + /* + * We don't support programming the CPU usage monitor on a task if any of its + * threads have a per-thread blocking CPU limit configured. + */ + if (thread->options & TH_OPT_PRVT_CPULIMIT) { + panic("CPU usage monitor activated, but blocking thread limit exists"); + } + + /* + * Make a note that this thread's CPU limit is being used for the task-wide CPU + * usage monitor. We don't have to arm the callback which will trigger the + * exception, because that was done for us in ledger_instantiate (because the + * ledger template used has a default callback). + */ + thread->options |= TH_OPT_PROC_CPULIMIT; + } else { + /* + * We deliberately override any CPU limit imposed by a task-wide limit (eg + * CPU usage monitor). + */ + thread->options &= ~TH_OPT_PROC_CPULIMIT; + + thread->options |= TH_OPT_PRVT_CPULIMIT; + /* The per-thread ledger template by default has a callback for CPU time */ + ledger_disable_callback(l, thread_ledgers.cpu_time); + ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK); + } + + return (0); +} + int split_funnel_off = 0; lck_grp_t *funnel_lck_grp = LCK_GRP_NULL; lck_grp_attr_t *funnel_lck_grp_attr; @@ -1407,6 +1942,13 @@ thread_tid( return (thread != THREAD_NULL? thread->thread_id: 0); } +uint16_t thread_set_tag(thread_t th, uint16_t tag) { + return thread_set_tag_internal(th, tag); +} +uint16_t thread_get_tag(thread_t th) { + return thread_get_tag_internal(th); +} + uint64_t thread_dispatchqaddr( thread_t thread) @@ -1415,11 +1957,7 @@ thread_dispatchqaddr( uint64_t thread_handle = 0; if (thread != THREAD_NULL) { -#if defined(__ppc__) || defined(__arm__) thread_handle = thread->machine.cthread_self; -#else - thread_handle = thread->machine.pcb->cthread_self; -#endif if (thread->task->bsd_info) dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); @@ -1495,12 +2033,6 @@ vm_offset_t dtrace_get_kernel_stack(thread_t thread) int64_t dtrace_calc_thread_recent_vtime(thread_t thread) { -#if STAT_TIME - if (thread != THREAD_NULL) { - return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)); - } else - return 0; -#else if (thread != THREAD_NULL) { processor_t processor = current_processor(); uint64_t abstime = mach_absolute_time(); @@ -1512,7 +2044,6 @@ int64_t dtrace_calc_thread_recent_vtime(thread_t thread) (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */ } else return 0; -#endif } void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache) @@ -1557,10 +2088,22 @@ vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover) void dtrace_thread_bootstrap(void) { task_t task = current_task(); - if(task->thread_count == 1) { + + if (task->thread_count == 1) { + thread_t thread = current_thread(); + if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) { + thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS; + DTRACE_PROC(exec__success); + } DTRACE_PROC(start); } DTRACE_PROC(lwp__start); } + +void +dtrace_thread_didexec(thread_t thread) +{ + thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS; +} #endif /* CONFIG_DTRACE */