X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4b17d6b6e417f714551ec129064745ea9919780e..143464d58d2bd6378e74eec636961ceb0d32fb91:/osfmk/kern/thread.c diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 92840195a..6f6fd6ff9 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -115,6 +115,11 @@ #include #include #include +#include +#include +#if KPC +#include +#endif #include #include @@ -154,8 +159,12 @@ static void sched_call_null( #ifdef MACH_BSD extern void proc_exit(void *); extern uint64_t get_dispatchqueue_offset_from_proc(void *); +extern int proc_selfpid(void); +extern char * proc_name_address(void *p); #endif /* MACH_BSD */ +extern int disable_exc_resource; +extern int audio_active; extern int debug_task; int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ int task_threadmax = CONFIG_THREAD_MAX; @@ -165,6 +174,23 @@ static uint64_t thread_unique_id = 0; struct _thread_ledger_indices thread_ledgers = { -1 }; static ledger_template_t thread_ledger_template = NULL; void init_thread_ledgers(void); +int task_disable_cpumon(task_t task); + +/* + * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry. + * + * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user + * stacktraces, aka micro-stackshots) + */ +#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70 + +int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */ +void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void); + +/* + * The smallest interval over which we support limiting CPU consumption is 1ms + */ +#define MINIMUM_CPULIMIT_INTERVAL_MS 1 void thread_bootstrap(void) @@ -202,6 +228,7 @@ thread_bootstrap(void) thread_template.pending_promoter_index = 0; thread_template.pending_promoter[0] = thread_template.pending_promoter[1] = NULL; + thread_template.rwlock_count = 0; thread_template.realtime.deadline = UINT64_MAX; @@ -253,6 +280,10 @@ thread_bootstrap(void) thread_template.t_dtrace_tracing = 0; #endif /* CONFIG_DTRACE */ +#if KPC + thread_template.kpc_buf = NULL; +#endif + thread_template.t_chud = 0; thread_template.t_page_creation_count = 0; thread_template.t_page_creation_time = 0; @@ -265,14 +296,18 @@ thread_bootstrap(void) thread_template.t_ledger = LEDGER_NULL; thread_template.t_threadledger = LEDGER_NULL; - thread_template.appliedstate = default_task_null_policy; - thread_template.ext_appliedstate = default_task_null_policy; - thread_template.policystate = default_task_proc_policy; - thread_template.ext_policystate = default_task_proc_policy; -#if CONFIG_EMBEDDED - thread_template.taskwatch = NULL; - thread_template.saved_importance = 0; -#endif /* CONFIG_EMBEDDED */ + thread_template.requested_policy = default_task_requested_policy; + thread_template.effective_policy = default_task_effective_policy; + thread_template.pended_policy = default_task_pended_policy; + + thread_template.iotier_override = THROTTLE_LEVEL_NONE; + + thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0; + + thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0; + thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0; + + thread_template.thread_tag = 0; init_thread = thread_template; machine_set_current_thread(&init_thread); @@ -299,6 +334,11 @@ thread_init(void) */ machine_thread_init(); + if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct, + sizeof (cpumon_ustackshots_trigger_pct))) { + cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT; + } + init_thread_ledgers(); } @@ -327,8 +367,6 @@ thread_terminate_self(void) thread_mtx_lock(thread); - ulock_release_all(thread); - ipc_thread_disable(thread); thread_mtx_unlock(thread); @@ -364,9 +402,6 @@ thread_terminate_self(void) thread_policy_reset(thread); -#if CONFIG_EMBEDDED - thead_remove_taskwatch(thread); -#endif /* CONFIG_EMBEDDED */ task = thread->task; uthread_cleanup(task, thread->uthread, task->bsd_info); @@ -419,6 +454,7 @@ thread_terminate_self(void) thread->state |= TH_TERMINATE; thread_mark_wait_locked(thread, THREAD_UNINT); assert(thread->promotions == 0); + assert(thread->rwlock_count == 0); thread_unlock(thread); /* splsched */ @@ -438,6 +474,13 @@ thread_deallocate( if (thread_deallocate_internal(thread) > 0) return; + if(!(thread->state & TH_TERMINATE2)) + panic("thread_deallocate: thread not properly terminated\n"); + +#if KPC + kpc_thread_destroy(thread); +#endif + ipc_thread_terminate(thread); @@ -568,25 +611,29 @@ static void thread_stack_daemon(void) { thread_t thread; + spl_t s; + s = splsched(); simple_lock(&thread_stack_lock); while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) { simple_unlock(&thread_stack_lock); + splx(s); + /* allocate stack with interrupts enabled so that we can call into VM */ stack_alloc(thread); - (void)splsched(); + s = splsched(); thread_lock(thread); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); thread_unlock(thread); - (void)spllo(); simple_lock(&thread_stack_lock); } assert_wait((event_t)&thread_stack_queue, THREAD_UNINT); simple_unlock(&thread_stack_lock); + splx(s); thread_block((thread_continue_t)thread_stack_daemon); /*NOTREACHED*/ @@ -697,7 +744,6 @@ thread_create_internal( lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); ipc_thread_init(new_thread); - queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; @@ -775,6 +821,13 @@ thread_create_internal( new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? THREAD_PMC_FLAG : 0U; #endif +#if KPC + kpc_thread_create(new_thread); +#endif + + /* Only need to update policies pushed from task to thread */ + new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier; + new_thread->requested_policy.terminated = parent_task->effective_policy.terminated; /* Set the thread's scheduling parameters */ new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task); @@ -784,27 +837,8 @@ thread_create_internal( new_thread->priority = (priority < 0)? parent_task->priority: priority; if (new_thread->priority > new_thread->max_priority) new_thread->priority = new_thread->max_priority; -#if CONFIG_EMBEDDED - if (new_thread->priority < MAXPRI_THROTTLE) { - new_thread->priority = MAXPRI_THROTTLE; - } -#endif /* CONFIG_EMBEDDED */ - new_thread->importance = - new_thread->priority - new_thread->task_priority; -#if CONFIG_EMBEDDED + new_thread->importance = new_thread->priority - new_thread->task_priority; new_thread->saved_importance = new_thread->importance; - /* apple ios daemon starts all threads in darwin background */ - if (parent_task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - /* Cannot use generic routines here so apply darwin bacground directly */ - new_thread->policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; - /* set thread self backgrounding */ - new_thread->appliedstate.hw_bg = new_thread->policystate.hw_bg; - /* priority will get recomputed suitably bit later */ - new_thread->importance = INT_MIN; - /* to avoid changes to many pri compute routines, set the effect of those here */ - new_thread->priority = MAXPRI_THROTTLE; - } -#endif /* CONFIG_EMBEDDED */ #if defined(CONFIG_SCHED_TRADITIONAL) new_thread->sched_stamp = sched_tick; @@ -1027,9 +1061,6 @@ kernel_thread_create( stack_alloc(thread); assert(thread->kernel_stack != 0); -#if CONFIG_EMBEDDED - if (priority > BASEPRI_KERNEL) -#endif thread->reserved_stack = thread->kernel_stack; thread->parameter = parameter; @@ -1073,29 +1104,6 @@ kernel_thread_start( return kernel_thread_start_priority(continuation, parameter, -1, new_thread); } -#if defined(__i386__) - -thread_t -kernel_thread( - task_t task, - void (*start)(void)) -{ - kern_return_t result; - thread_t thread; - - if (task != kernel_task) - panic("kernel_thread"); - - result = kernel_thread_start_priority((thread_continue_t)start, NULL, -1, &thread); - if (result != KERN_SUCCESS) - return (THREAD_NULL); - - thread_deallocate(thread); - - return (thread); -} - -#endif /* defined(__i386__) */ kern_return_t thread_info_internal( @@ -1155,7 +1163,7 @@ thread_info_internal( POLICY_TIMESHARE: POLICY_RR); flags = 0; - if (thread->bound_processor != PROCESSOR_NULL && thread->bound_processor->idle_thread == thread) + if (thread->options & TH_OPT_IDLE_THREAD) flags |= TH_FLAGS_IDLE; if (!thread->kernel_stack) @@ -1431,24 +1439,184 @@ thread_wire( return (thread_wire_internal(host_priv, thread, wired, NULL)); } + +/* + * XXX assuming current thread only, for now... + */ +void +thread_guard_violation(thread_t thread, unsigned type) +{ + assert(thread == current_thread()); + + spl_t s = splsched(); + /* + * Use the saved state area of the thread structure + * to store all info required to handle the AST when + * returning to userspace + */ + thread->guard_exc_info.type = type; + thread_ast_set(thread, AST_GUARD); + ast_propagate(thread->ast); + + splx(s); +} + +/* + * guard_ast: + * + * Handle AST_GUARD for a thread. This routine looks at the + * state saved in the thread structure to determine the cause + * of this exception. Based on this value, it invokes the + * appropriate routine which determines other exception related + * info and raises the exception. + */ +void +guard_ast(thread_t thread) +{ + if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT) + mach_port_guard_ast(thread); + else + fd_guard_ast(thread); +} + static void -thread_resource_exception(const void *arg0, __unused const void *arg1) +thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1) { - thread_t thread = current_thread(); - int code = (int)((uintptr_t)arg0 & ((int)-1)); - + if (warning == LEDGER_WARNING_ROSE_ABOVE) { +#if CONFIG_TELEMETRY + /* + * This thread is in danger of violating the CPU usage monitor. Enable telemetry + * on the entire task so there are micro-stackshots available if and when + * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots + * for this thread only; but now that this task is suspect, knowing what all of + * its threads are up to will be useful. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1); +#endif + return; + } + +#if CONFIG_TELEMETRY + /* + * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or + * exceeded the limit, turn telemetry off for the task. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0); +#endif + + if (warning == 0) { + THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(); + } +} + +void __attribute__((noinline)) +THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) +{ + int pid = 0; + task_t task = current_task(); + thread_t thread = current_thread(); + uint64_t tid = thread->thread_id; + char *procname = (char *) "unknown"; + time_value_t thread_total_time = {0, 0}; + time_value_t thread_system_time; + time_value_t thread_user_time; + int action; + uint8_t percentage; + uint32_t limit_percent; + uint32_t usage_percent; + uint32_t interval_sec; + uint64_t interval_ns; + uint64_t balance_ns; + boolean_t fatal = FALSE; + + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + struct ledger_entry_info lei; + assert(thread->t_threadledger != LEDGER_NULL); /* - * Disable the exception notification so we don't overwhelm - * the listener with an endless stream of redundant exceptions. + * Now that a thread has tripped the monitor, disable it for the entire task. */ - ledger_set_action(thread->t_threadledger, thread_ledgers.cpu_time, - LEDGER_ACTION_IGNORE); - ledger_disable_callback(thread->t_threadledger, thread_ledgers.cpu_time); + task_lock(task); + if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { + /* + * The CPU usage monitor has been disabled on our task, so some other + * thread must have gotten here first. We only send one exception per + * task lifetime, so there's nothing left for us to do here. + */ + task_unlock(task); + return; + } + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) { + fatal = TRUE; + } + task_disable_cpumon(task); + task_unlock(task); + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(task->bsd_info); +#endif + + thread_get_cpulimit(&action, &percentage, &interval_ns); + + interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC); + + thread_read_times(thread, &thread_user_time, &thread_system_time); + time_value_add(&thread_total_time, &thread_user_time); + time_value_add(&thread_total_time, &thread_system_time); + + ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei); + + absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns); + usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill); - /* XXX code should eventually be a user-exported namespace of resources */ - (void) task_exception_notify(EXC_RESOURCE, code, 0); + /* Show refill period in the same units as balance, limit, etc */ + nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period); + + limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period); + + /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */ + + if (disable_exc_resource) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed by a boot-arg\n", procname, pid, tid); + return; + } + + if (audio_active) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed due to audio playback\n", procname, pid, tid); + return; + } + printf("process %s[%d] thread %llu caught burning CPU! " + "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " + "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " + "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) " + "period: %llu time since last refill (ns): %llu \n", + procname, pid, tid, + percentage, usage_percent, interval_sec, + thread_total_time.seconds, thread_total_time.microseconds, + thread_user_time.seconds, thread_user_time.microseconds, + thread_system_time.seconds, thread_system_time.microseconds, + lei.lei_balance, + lei.lei_credit, lei.lei_debit, + lei.lei_limit, limit_percent, + lei.lei_refill_period, lei.lei_last_refill); + + + code[0] = code[1] = 0; + EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU); + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); + EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent); + exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + + if (fatal) { + task_terminate_internal(task); + } } void @@ -1465,8 +1633,7 @@ init_thread_ledgers(void) { panic("couldn't create cpu_time entry for thread ledger template"); } - if (ledger_set_callback(t, idx, thread_resource_exception, - (void *)(uintptr_t)idx, NULL) < 0) { + if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) { panic("couldn't set thread ledger callback for cpu_time entry"); } @@ -1474,12 +1641,66 @@ init_thread_ledgers(void) { thread_ledger_template = t; } +/* + * Returns currently applied CPU usage limit, or 0/0 if none is applied. + */ +int +thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns) +{ + int64_t abstime = 0; + uint64_t limittime = 0; + thread_t thread = current_thread(); + + *percentage = 0; + *interval_ns = 0; + *action = 0; + + if (thread->t_threadledger == LEDGER_NULL) { + /* + * This thread has no per-thread ledger, so it can't possibly + * have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns); + ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime); + + if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) { + /* + * This thread's CPU time ledger has no period or limit; so it + * doesn't have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + /* + * This calculation is the converse to the one in thread_set_cpulimit(). + */ + absolutetime_to_nanoseconds(abstime, &limittime); + *percentage = (limittime * 100ULL) / *interval_ns; + assert(*percentage <= 100); + + if (thread->options & TH_OPT_PROC_CPULIMIT) { + assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_BLOCK; + } else if (thread->options & TH_OPT_PRVT_CPULIMIT) { + assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_EXCEPTION; + } else { + *action = THREAD_CPULIMIT_DISABLE; + } + + return (KERN_SUCCESS); +} + /* * Set CPU usage limit on a thread. * * Calling with percentage of 0 will unset the limit for this thread. */ - int thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) { @@ -1490,23 +1711,28 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) assert(percentage <= 100); - if (percentage == 0) { + if (action == THREAD_CPULIMIT_DISABLE) { /* * Remove CPU limit, if any exists. */ if (thread->t_threadledger != LEDGER_NULL) { + l = thread->t_threadledger; /* * The only way to get a per-thread ledger is via CPU limits. */ assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)); - ledger_dereference(thread->t_threadledger); - thread->t_threadledger = LEDGER_NULL; + thread->t_threadledger = NULL; + ledger_dereference(l); thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT); } return (0); } + if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) { + return (KERN_INVALID_ARGUMENT); + } + l = thread->t_threadledger; if (l == LEDGER_NULL) { /* @@ -1528,28 +1754,41 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) */ limittime = (interval_ns * percentage) / 100; nanoseconds_to_absolutetime(limittime, &abstime); - ledger_set_limit(l, thread_ledgers.cpu_time, abstime); + ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct); /* * Refill the thread's allotted CPU time every interval_ns nanoseconds. */ ledger_set_period(l, thread_ledgers.cpu_time, interval_ns); - /* - * Ledgers supports multiple actions for one ledger entry, so we do too. - */ if (action == THREAD_CPULIMIT_EXCEPTION) { + /* + * We don't support programming the CPU usage monitor on a task if any of its + * threads have a per-thread blocking CPU limit configured. + */ + if (thread->options & TH_OPT_PRVT_CPULIMIT) { + panic("CPU usage monitor activated, but blocking thread limit exists"); + } + + /* + * Make a note that this thread's CPU limit is being used for the task-wide CPU + * usage monitor. We don't have to arm the callback which will trigger the + * exception, because that was done for us in ledger_instantiate (because the + * ledger template used has a default callback). + */ thread->options |= TH_OPT_PROC_CPULIMIT; - ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_EXCEPTION); - } + } else { + /* + * We deliberately override any CPU limit imposed by a task-wide limit (eg + * CPU usage monitor). + */ + thread->options &= ~TH_OPT_PROC_CPULIMIT; - if (action == THREAD_CPULIMIT_BLOCK) { thread->options |= TH_OPT_PRVT_CPULIMIT; /* The per-thread ledger template by default has a callback for CPU time */ ledger_disable_callback(l, thread_ledgers.cpu_time); ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK); } - thread->t_threadledger = l; return (0); } @@ -1703,12 +1942,10 @@ thread_tid( return (thread != THREAD_NULL? thread->thread_id: 0); } -uint16_t -thread_set_tag(thread_t th, uint16_t tag) { +uint16_t thread_set_tag(thread_t th, uint16_t tag) { return thread_set_tag_internal(th, tag); } -uint16_t -thread_get_tag(thread_t th) { +uint16_t thread_get_tag(thread_t th) { return thread_get_tag_internal(th); } @@ -1851,10 +2088,22 @@ vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover) void dtrace_thread_bootstrap(void) { task_t task = current_task(); - if(task->thread_count == 1) { + + if (task->thread_count == 1) { + thread_t thread = current_thread(); + if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) { + thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS; + DTRACE_PROC(exec__success); + } DTRACE_PROC(start); } DTRACE_PROC(lwp__start); } + +void +dtrace_thread_didexec(thread_t thread) +{ + thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS; +} #endif /* CONFIG_DTRACE */