X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3e170ce000f1506b7b5d2c5c7faec85ceabb573d..94ff46dc2849db4d43eaaf144872decc522aafb4:/osfmk/corpses/corpse.c diff --git a/osfmk/corpses/corpse.c b/osfmk/corpses/corpse.c index 27a0c13f5..a3d283be4 100644 --- a/osfmk/corpses/corpse.c +++ b/osfmk/corpses/corpse.c @@ -30,17 +30,17 @@ /* * Corpses Overview * ================ - * + * * A corpse is a state of process that is past the point of its death. This means that process has * completed all its termination operations like releasing file descriptors, mach ports, sockets and * other constructs used to identify a process. For all the processes this mimics the behavior as if * the process has died and no longer available by any means. - * + * * Why do we need Corpses? * ----------------------- * For crash inspection we need to inspect the state and data that is associated with process so that * crash reporting infrastructure can build backtraces, find leaks etc. For example a crash - * + * * Corpses functionality in kernel * =============================== * The corpse functionality is an extension of existing exception reporting mechanisms we have. The @@ -49,7 +49,7 @@ * notification the exception is not handled, then the process begins the death operations and during * proc_prepareexit, we decide to create a corpse for inspection. Following is a sample run through * of events and data shuffling that happens when corpses is enabled. - * + * * * a process causes an exception during normal execution of threads. * * The exception generated by either mach(e.g GUARDED_MARCHPORT) or bsd(eg SIGABORT, GUARDED_FD * etc) side is passed through the exception_triage() function to follow the thread -> task -> host @@ -78,8 +78,8 @@ * inspection flag set are just bounced to another holding queue (crashed_threads_queue). * Only after the corpse notification these are pulled out from holding queue and enqueued * back to termination queue - * - * + * + * * Corpse info format * ================== * The kernel (task_mark_corpse()) makes a vm allocation in the dead task's vm space (with tag @@ -88,7 +88,7 @@ * * bsd proc exit path may write down pid, parent pid, number of file descriptors etc * * mach side may append data regarding ledger usage, memory stats etc * See detailed info about the memory structure and format in kern_cdata.h documentation. - * + * * Configuring Corpses functionality * ================================= * boot-arg: -no_corpses disables the corpse generation. This can be added/removed without affecting @@ -98,7 +98,7 @@ * by system. * CORPSEINFO_ALLOCATION_SIZE: is the default size of vm allocation. If in future there is much more * data to be put in, then please re-tune this parameter. - * + * * Debugging/Visibility * ==================== * * lldbmacros for thread and task summary are updated to show "C" flag for corpse task/threads. @@ -106,16 +106,17 @@ * and holding queue (dumpcrashed_thread_queue). * * In case of corpse creation is disabled of ignored then the system log is updated with * printf data with reason. - * + * * Limitations of Corpses * ====================== * With holding off memory for inspection, it creates vm pressure which might not be desirable * on low memory devices. There are limits to max corpses being inspected at a time which is * marked by TOTAL_CORPSES_ALLOWED. - * + * */ +#include #include #include #include @@ -128,85 +129,195 @@ #include #include #include +#include -unsigned long total_corpses_count = 0; +#if CONFIG_MACF +#include +#endif + +/* + * Exported interfaces + */ +#include + +union corpse_creation_gate { + struct { + uint16_t user_faults; + uint16_t corpses; + }; + uint32_t value; +}; + +static _Atomic uint32_t inflight_corpses; unsigned long total_corpses_created = 0; boolean_t corpse_enabled_config = TRUE; -kcdata_descriptor_t task_get_corpseinfo(task_t task); -kcdata_descriptor_t task_crashinfo_alloc_init(mach_vm_address_t crash_data_p, unsigned size); -kern_return_t task_crashinfo_destroy(kcdata_descriptor_t data); -static kern_return_t task_crashinfo_get_ref(); -static kern_return_t task_crashinfo_release_ref(); +/* bootarg to generate corpse with size up to max_footprint_mb */ +boolean_t corpse_threshold_system_limit = FALSE; +/* bootarg to turn on corpse forking for EXC_RESOURCE */ +int exc_via_corpse_forking = 1; + +/* bootarg to generate corpse for fatal high memory watermark violation */ +int corpse_for_fatal_memkill = 1; + +#ifdef __arm__ +static inline int +IS_64BIT_PROCESS(__unused void *p) +{ + return 0; +} +#else +extern int IS_64BIT_PROCESS(void *); +#endif /* __arm__ */ +extern void gather_populate_corpse_crashinfo(void *p, task_t task, + mach_exception_data_type_t code, mach_exception_data_type_t subcode, + uint64_t *udata_buffer, int num_udata, void *reason); +extern void *proc_find(int pid); +extern int proc_rele(void *p); -void corpses_init(){ +void +corpses_init() +{ char temp_buf[20]; + int exc_corpse_forking; + int fatal_memkill; if (PE_parse_boot_argn("-no_corpses", temp_buf, sizeof(temp_buf))) { corpse_enabled_config = FALSE; } + if (PE_parse_boot_argn("exc_via_corpse_forking", &exc_corpse_forking, sizeof(exc_corpse_forking))) { + exc_via_corpse_forking = exc_corpse_forking; + } + if (PE_parse_boot_argn("corpse_for_fatal_memkill", &fatal_memkill, sizeof(fatal_memkill))) { + corpse_for_fatal_memkill = fatal_memkill; + } +#if DEBUG || DEVELOPMENT + if (PE_parse_boot_argn("-corpse_threshold_system_limit", &corpse_threshold_system_limit, sizeof(corpse_threshold_system_limit))) { + corpse_threshold_system_limit = TRUE; + } +#endif /* DEBUG || DEVELOPMENT */ } /* * Routine: corpses_enabled * returns FALSE if not enabled */ -boolean_t corpses_enabled() +boolean_t +corpses_enabled() { return corpse_enabled_config; } +unsigned long +total_corpses_count(void) +{ + union corpse_creation_gate gate; + + gate.value = atomic_load_explicit(&inflight_corpses, memory_order_relaxed); + return gate.corpses; +} + /* * Routine: task_crashinfo_get_ref() * Grab a slot at creating a corpse. * Returns: KERN_SUCCESS if the policy allows for creating a corpse. */ -kern_return_t task_crashinfo_get_ref() +static kern_return_t +task_crashinfo_get_ref(uint16_t kcd_u_flags) { - unsigned long counter = total_corpses_count; - counter = OSIncrementAtomic((SInt32 *)&total_corpses_count); - if (counter >= TOTAL_CORPSES_ALLOWED) { - OSDecrementAtomic((SInt32 *)&total_corpses_count); - return KERN_RESOURCE_SHORTAGE; + union corpse_creation_gate oldgate, newgate; + + assert(kcd_u_flags & CORPSE_CRASHINFO_HAS_REF); + + oldgate.value = atomic_load_explicit(&inflight_corpses, memory_order_relaxed); + for (;;) { + newgate = oldgate; + if (kcd_u_flags & CORPSE_CRASHINFO_USER_FAULT) { + if (newgate.user_faults++ >= TOTAL_USER_FAULTS_ALLOWED) { + return KERN_RESOURCE_SHORTAGE; + } + } + if (newgate.corpses++ >= TOTAL_CORPSES_ALLOWED) { + return KERN_RESOURCE_SHORTAGE; + } + + // this reloads the value in oldgate + if (atomic_compare_exchange_strong_explicit(&inflight_corpses, + &oldgate.value, newgate.value, memory_order_relaxed, + memory_order_relaxed)) { + return KERN_SUCCESS; + } } - OSIncrementAtomicLong((volatile long *)&total_corpses_created); - return KERN_SUCCESS; } /* * Routine: task_crashinfo_release_ref * release the slot for corpse being used. */ -kern_return_t task_crashinfo_release_ref() +static kern_return_t +task_crashinfo_release_ref(uint16_t kcd_u_flags) { - unsigned long __assert_only counter; - counter = OSDecrementAtomic((SInt32 *)&total_corpses_count); - assert(counter > 0); - return KERN_SUCCESS; + union corpse_creation_gate oldgate, newgate; + + assert(kcd_u_flags & CORPSE_CRASHINFO_HAS_REF); + + oldgate.value = atomic_load_explicit(&inflight_corpses, memory_order_relaxed); + for (;;) { + newgate = oldgate; + if (kcd_u_flags & CORPSE_CRASHINFO_USER_FAULT) { + if (newgate.user_faults-- == 0) { + panic("corpse in flight count over-release"); + } + } + if (newgate.corpses-- == 0) { + panic("corpse in flight count over-release"); + } + // this reloads the value in oldgate + if (atomic_compare_exchange_strong_explicit(&inflight_corpses, + &oldgate.value, newgate.value, memory_order_relaxed, + memory_order_relaxed)) { + return KERN_SUCCESS; + } + } } -kcdata_descriptor_t task_crashinfo_alloc_init(mach_vm_address_t crash_data_p, unsigned size) +kcdata_descriptor_t +task_crashinfo_alloc_init(mach_vm_address_t crash_data_p, unsigned size, + uint32_t kc_u_flags, unsigned kc_flags) { - if(KERN_SUCCESS != task_crashinfo_get_ref()) { - return NULL; + kcdata_descriptor_t kcdata; + + if (kc_u_flags & CORPSE_CRASHINFO_HAS_REF) { + if (KERN_SUCCESS != task_crashinfo_get_ref(kc_u_flags)) { + return NULL; + } } - return kcdata_memory_alloc_init(crash_data_p, TASK_CRASHINFO_BEGIN, size, KCFLAG_USE_COPYOUT); + kcdata = kcdata_memory_alloc_init(crash_data_p, TASK_CRASHINFO_BEGIN, size, + kc_flags); + if (kcdata) { + kcdata->kcd_user_flags = kc_u_flags; + } else if (kc_u_flags & CORPSE_CRASHINFO_HAS_REF) { + task_crashinfo_release_ref(kc_u_flags); + } + return kcdata; } /* * Free up the memory associated with task_crashinfo_data */ -kern_return_t task_crashinfo_destroy(kcdata_descriptor_t data) +kern_return_t +task_crashinfo_destroy(kcdata_descriptor_t data) { if (!data) { return KERN_INVALID_ARGUMENT; } - - task_crashinfo_release_ref(); + if (data->kcd_user_flags & CORPSE_CRASHINFO_HAS_REF) { + task_crashinfo_release_ref(data->kcd_user_flags); + } return kcdata_memory_destroy(data); } @@ -216,13 +327,431 @@ kern_return_t task_crashinfo_destroy(kcdata_descriptor_t data) * returns: crash info data attached to task. * NULL if task is null or has no corpse info */ -kcdata_descriptor_t task_get_corpseinfo(task_t task) +kcdata_descriptor_t +task_get_corpseinfo(task_t task) { kcdata_descriptor_t retval = NULL; - if (task != NULL){ + if (task != NULL) { retval = task->corpse_info; } return retval; } +/* + * Routine: task_add_to_corpse_task_list + * params: task - task to be added to corpse task list + * returns: None. + */ +void +task_add_to_corpse_task_list(task_t corpse_task) +{ + lck_mtx_lock(&tasks_corpse_lock); + queue_enter(&corpse_tasks, corpse_task, task_t, corpse_tasks); + lck_mtx_unlock(&tasks_corpse_lock); +} + +/* + * Routine: task_remove_from_corpse_task_list + * params: task - task to be removed from corpse task list + * returns: None. + */ +void +task_remove_from_corpse_task_list(task_t corpse_task) +{ + lck_mtx_lock(&tasks_corpse_lock); + queue_remove(&corpse_tasks, corpse_task, task_t, corpse_tasks); + lck_mtx_unlock(&tasks_corpse_lock); +} + +/* + * Routine: task_purge_all_corpses + * params: None. + * returns: None. + */ +void +task_purge_all_corpses(void) +{ + task_t task; + + printf("Purging corpses......\n\n"); + + lck_mtx_lock(&tasks_corpse_lock); + /* Iterate through all the corpse tasks and clear all map entries */ + queue_iterate(&corpse_tasks, task, task_t, corpse_tasks) { + vm_map_remove(task->map, + task->map->min_offset, + task->map->max_offset, + /* + * Final cleanup: + * + no unnesting + * + remove immutable mappings + * + allow gaps in the range + */ + (VM_MAP_REMOVE_NO_UNNESTING | + VM_MAP_REMOVE_IMMUTABLE | + VM_MAP_REMOVE_GAPS_OK)); + } + + lck_mtx_unlock(&tasks_corpse_lock); +} + +/* + * Routine: task_generate_corpse + * params: task - task to fork a corpse + * corpse_task - task port of the generated corpse + * returns: KERN_SUCCESS on Success. + * KERN_FAILURE on Failure. + * KERN_NOT_SUPPORTED on corpse disabled. + * KERN_RESOURCE_SHORTAGE on memory alloc failure or reaching max corpse. + */ +kern_return_t +task_generate_corpse( + task_t task, + ipc_port_t *corpse_task_port) +{ + task_t new_task; + kern_return_t kr; + thread_t thread, th_iter; + ipc_port_t corpse_port; + ipc_port_t old_notify; + + if (task == kernel_task || task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + + task_lock(task); + if (task_is_a_corpse_fork(task)) { + task_unlock(task); + return KERN_INVALID_ARGUMENT; + } + task_unlock(task); + + /* Generate a corpse for the given task, will return with a ref on corpse task */ + kr = task_generate_corpse_internal(task, &new_task, &thread, 0, 0, 0, NULL); + if (kr != KERN_SUCCESS) { + return kr; + } + if (thread != THREAD_NULL) { + thread_deallocate(thread); + } + + /* wait for all the threads in the task to terminate */ + task_lock(new_task); + task_wait_till_threads_terminate_locked(new_task); + + /* Reset thread ports of all the threads in task */ + queue_iterate(&new_task->threads, th_iter, thread_t, task_threads) + { + /* Do not reset the thread port for inactive threads */ + if (th_iter->corpse_dup == FALSE) { + ipc_thread_reset(th_iter); + } + } + task_unlock(new_task); + + /* transfer the task ref to port and arm the no-senders notification */ + corpse_port = convert_task_to_port(new_task); + assert(IP_NULL != corpse_port); + + ip_lock(corpse_port); + require_ip_active(corpse_port); + ipc_port_nsrequest(corpse_port, corpse_port->ip_mscount, ipc_port_make_sonce_locked(corpse_port), &old_notify); + /* port unlocked */ + + assert(IP_NULL == old_notify); + *corpse_task_port = corpse_port; + return KERN_SUCCESS; +} + +/* + * Routine: task_enqueue_exception_with_corpse + * params: task - task to generate a corpse and enqueue it + * etype - EXC_RESOURCE or EXC_GUARD + * code - exception code to be enqueued + * codeCnt - code array count - code and subcode + * + * returns: KERN_SUCCESS on Success. + * KERN_FAILURE on Failure. + * KERN_INVALID_ARGUMENT on invalid arguments passed. + * KERN_NOT_SUPPORTED on corpse disabled. + * KERN_RESOURCE_SHORTAGE on memory alloc failure or reaching max corpse. + */ +kern_return_t +task_enqueue_exception_with_corpse( + task_t task, + exception_type_t etype, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + void *reason) +{ + task_t new_task = TASK_NULL; + thread_t thread = THREAD_NULL; + kern_return_t kr; + + if (codeCnt < 2) { + return KERN_INVALID_ARGUMENT; + } + + /* Generate a corpse for the given task, will return with a ref on corpse task */ + kr = task_generate_corpse_internal(task, &new_task, &thread, + etype, code[0], code[1], reason); + if (kr == KERN_SUCCESS) { + if (thread == THREAD_NULL) { + return KERN_FAILURE; + } + assert(new_task != TASK_NULL); + assert(etype == EXC_RESOURCE || etype == EXC_GUARD); + thread_exception_enqueue(new_task, thread, etype); + } + return kr; +} + +/* + * Routine: task_generate_corpse_internal + * params: task - task to fork a corpse + * corpse_task - task of the generated corpse + * exc_thread - equivalent thread in corpse enqueuing exception + * etype - EXC_RESOURCE or EXC_GUARD or 0 + * code - mach exception code to be passed in corpse blob + * subcode - mach exception subcode to be passed in corpse blob + * returns: KERN_SUCCESS on Success. + * KERN_FAILURE on Failure. + * KERN_NOT_SUPPORTED on corpse disabled. + * KERN_RESOURCE_SHORTAGE on memory alloc failure or reaching max corpse. + */ +kern_return_t +task_generate_corpse_internal( + task_t task, + task_t *corpse_task, + thread_t *exc_thread, + exception_type_t etype, + mach_exception_data_type_t code, + mach_exception_data_type_t subcode, + void *reason) +{ + task_t new_task = TASK_NULL; + thread_t thread = THREAD_NULL; + thread_t thread_next = THREAD_NULL; + kern_return_t kr; + struct proc *p = NULL; + int is_64bit_addr; + int is_64bit_data; + int t_flags; + uint64_t *udata_buffer = NULL; + int size = 0; + int num_udata = 0; + uint16_t kc_u_flags = CORPSE_CRASHINFO_HAS_REF; + +#if CONFIG_MACF + struct label *label = NULL; +#endif + + if (!corpses_enabled()) { + return KERN_NOT_SUPPORTED; + } + + if (etype == EXC_GUARD && EXC_GUARD_DECODE_GUARD_TYPE(code) == GUARD_TYPE_USER) { + kc_u_flags |= CORPSE_CRASHINFO_USER_FAULT; + } + + kr = task_crashinfo_get_ref(kc_u_flags); + if (kr != KERN_SUCCESS) { + return kr; + } + + /* Having a task reference does not guarantee a proc reference */ + p = proc_find(task_pid(task)); + if (p == NULL) { + kr = KERN_INVALID_TASK; + goto error_task_generate_corpse; + } + + is_64bit_addr = IS_64BIT_PROCESS(p); + is_64bit_data = (task == TASK_NULL) ? is_64bit_addr : task_get_64bit_data(task); + t_flags = TF_CORPSE_FORK | + TF_PENDING_CORPSE | + TF_CORPSE | + (is_64bit_addr ? TF_64B_ADDR : TF_NONE) | + (is_64bit_data ? TF_64B_DATA : TF_NONE); + +#if CONFIG_MACF + /* Create the corpse label credentials from the process. */ + label = mac_exc_create_label_for_proc(p); +#endif + + /* Create a task for corpse */ + kr = task_create_internal(task, + NULL, + TRUE, + is_64bit_addr, + is_64bit_data, + t_flags, + TPF_NONE, + TWF_NONE, + &new_task); + if (kr != KERN_SUCCESS) { + goto error_task_generate_corpse; + } + + /* Create and copy threads from task, returns a ref to thread */ + kr = task_duplicate_map_and_threads(task, p, new_task, &thread, + &udata_buffer, &size, &num_udata); + if (kr != KERN_SUCCESS) { + goto error_task_generate_corpse; + } + + kr = task_collect_crash_info(new_task, +#if CONFIG_MACF + label, +#endif + TRUE); + if (kr != KERN_SUCCESS) { + goto error_task_generate_corpse; + } + + /* transfer our references to the corpse info */ + assert(new_task->corpse_info->kcd_user_flags == 0); + new_task->corpse_info->kcd_user_flags = kc_u_flags; + kc_u_flags = 0; + + kr = task_start_halt(new_task); + if (kr != KERN_SUCCESS) { + goto error_task_generate_corpse; + } + + /* terminate the ipc space */ + ipc_space_terminate(new_task->itk_space); + + /* Populate the corpse blob, use the proc struct of task instead of corpse task */ + gather_populate_corpse_crashinfo(p, new_task, + code, subcode, udata_buffer, num_udata, reason); + + /* Add it to global corpse task list */ + task_add_to_corpse_task_list(new_task); + + *corpse_task = new_task; + *exc_thread = thread; + +error_task_generate_corpse: +#if CONFIG_MACF + if (label) { + mac_exc_free_label(label); + } +#endif + + /* Release the proc reference */ + if (p != NULL) { + proc_rele(p); + } + + if (kr != KERN_SUCCESS) { + if (thread != THREAD_NULL) { + thread_deallocate(thread); + } + if (new_task != TASK_NULL) { + task_lock(new_task); + /* Terminate all the other threads in the task. */ + queue_iterate(&new_task->threads, thread_next, thread_t, task_threads) + { + thread_terminate_internal(thread_next); + } + /* wait for all the threads in the task to terminate */ + task_wait_till_threads_terminate_locked(new_task); + task_unlock(new_task); + + task_clear_corpse(new_task); + task_terminate_internal(new_task); + task_deallocate(new_task); + } + if (kc_u_flags) { + task_crashinfo_release_ref(kc_u_flags); + } + } + /* Free the udata buffer allocated in task_duplicate_map_and_threads */ + if (udata_buffer != NULL) { + kfree(udata_buffer, size); + } + + return kr; +} + +/* + * Routine: task_map_corpse_info + * params: task - Map the corpse info in task's address space + * corpse_task - task port of the corpse + * kcd_addr_begin - address of the mapped corpse info + * kcd_addr_begin - size of the mapped corpse info + * returns: KERN_SUCCESS on Success. + * KERN_FAILURE on Failure. + * KERN_INVALID_ARGUMENT on invalid arguments. + * Note: Temporary function, will be deleted soon. + */ +kern_return_t +task_map_corpse_info( + task_t task, + task_t corpse_task, + vm_address_t *kcd_addr_begin, + uint32_t *kcd_size) +{ + kern_return_t kr; + mach_vm_address_t kcd_addr_begin_64; + mach_vm_size_t size_64; + + kr = task_map_corpse_info_64(task, corpse_task, &kcd_addr_begin_64, &size_64); + if (kr != KERN_SUCCESS) { + return kr; + } + + *kcd_addr_begin = (vm_address_t)kcd_addr_begin_64; + *kcd_size = (uint32_t) size_64; + return KERN_SUCCESS; +} + +/* + * Routine: task_map_corpse_info_64 + * params: task - Map the corpse info in task's address space + * corpse_task - task port of the corpse + * kcd_addr_begin - address of the mapped corpse info (takes mach_vm_addess_t *) + * kcd_addr_begin - size of the mapped corpse info (takes mach_vm_size_t *) + * returns: KERN_SUCCESS on Success. + * KERN_FAILURE on Failure. + * KERN_INVALID_ARGUMENT on invalid arguments. + */ +kern_return_t +task_map_corpse_info_64( + task_t task, + task_t corpse_task, + mach_vm_address_t *kcd_addr_begin, + mach_vm_size_t *kcd_size) +{ + kern_return_t kr; + mach_vm_offset_t crash_data_ptr = 0; + mach_vm_size_t size = CORPSEINFO_ALLOCATION_SIZE; + void *corpse_info_kernel = NULL; + + if (task == TASK_NULL || task_is_a_corpse_fork(task)) { + return KERN_INVALID_ARGUMENT; + } + if (corpse_task == TASK_NULL || !task_is_a_corpse(corpse_task) || + kcdata_memory_get_begin_addr(corpse_task->corpse_info) == NULL) { + return KERN_INVALID_ARGUMENT; + } + corpse_info_kernel = kcdata_memory_get_begin_addr(corpse_task->corpse_info); + kr = mach_vm_allocate_kernel(task->map, &crash_data_ptr, size, + VM_FLAGS_ANYWHERE, VM_MEMORY_CORPSEINFO); + if (kr != KERN_SUCCESS) { + return kr; + } + copyout(corpse_info_kernel, crash_data_ptr, size); + *kcd_addr_begin = crash_data_ptr; + *kcd_size = size; + + return KERN_SUCCESS; +} + +uint64_t +task_corpse_get_crashed_thread_id(task_t corpse_task) +{ + return corpse_task->crashed_thread_id; +}