X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5ba3f43ea354af8ad55bea84372a2bc834d8757c..b226f5e54a60dc81db17b1260381d7dbfea3cdf1:/osfmk/kern/task.c diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index c2a29291e..792daf7bc 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -161,6 +161,7 @@ #include #include +#include #if CONFIG_ATM #include @@ -184,6 +185,7 @@ lck_grp_attr_t task_lck_grp_attr; extern int exc_via_corpse_forking; extern int corpse_for_fatal_memkill; +extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p); /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */ int audio_active = 0; @@ -197,15 +199,39 @@ lck_spin_t dead_task_statistics_lock; ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers __attribute__((used)) = - {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) = +{.cpu_time = -1, + .tkm_private = -1, + .tkm_shared = -1, + .phys_mem = -1, + .wired_mem = -1, + .internal = -1, + .iokit_mapped = -1, + .alternate_accounting = -1, + .alternate_accounting_compressed = -1, + .page_table = -1, + .phys_footprint = -1, + .internal_compressed = -1, + .purgeable_volatile = -1, + .purgeable_nonvolatile = -1, + .purgeable_volatile_compressed = -1, + .purgeable_nonvolatile_compressed = -1, + .network_volatile = -1, + .network_nonvolatile = -1, + .network_volatile_compressed = -1, + .network_nonvolatile_compressed = -1, + .platform_idle_wakeups = -1, + .interrupt_wakeups = -1, #if !CONFIG_EMBEDDED - { 0 /* initialized at runtime */}, + .sfi_wait_times = { 0 /* initialized at runtime */}, #endif /* !CONFIG_EMBEDDED */ - -1, -1, - -1, -1, - -1, -1, - }; + .cpu_time_billed_to_me = -1, + .cpu_time_billed_to_others = -1, + .physical_writes = -1, + .logical_writes = -1, + .energy_billed_to_me = -1, + .energy_billed_to_others = -1 +}; /* System sleep state */ boolean_t tasks_suspend_state; @@ -266,8 +292,11 @@ int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated int64_t global_logical_writes_count = 0; /* Global count for logical writes */ static boolean_t global_update_logical_writes(int64_t); +#define TASK_MAX_THREAD_LIMIT 256 + #if MACH_ASSERT int pmap_ledgers_panic = 1; +int pmap_ledgers_panic_leeway = 3; #endif /* MACH_ASSERT */ int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ @@ -280,19 +309,38 @@ int hwm_user_cores = 0; /* high watermark violations generate user core files */ extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long); extern int proc_pid(struct proc *p); extern int proc_selfpid(void); +extern struct proc *current_proc(void); extern char *proc_name_address(struct proc *p); extern uint64_t get_dispatchqueue_offset_from_proc(void *); extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize); +extern void workq_proc_suspended(struct proc *p); +extern void workq_proc_resumed(struct proc *p); #if CONFIG_MEMORYSTATUS extern void proc_memstat_terminated(struct proc* p, boolean_t set); extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal); extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal); -extern boolean_t memorystatus_allowed_vm_map_fork(__unused task_t task); +extern boolean_t memorystatus_allowed_vm_map_fork(task_t task); + +#if DEVELOPMENT || DEBUG +extern void memorystatus_abort_vm_map_fork(task_t); +#endif + #endif /* CONFIG_MEMORYSTATUS */ #endif /* MACH_BSD */ +#if DEVELOPMENT || DEBUG +int exc_resource_threads_enabled; +#endif /* DEVELOPMENT || DEBUG */ + +#if (DEVELOPMENT || DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE +uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE | + TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_CORPSE; +#else +uint32_t task_exc_guard_default = 0; +#endif + /* Forwards */ static void task_hold_locked(task_t task); @@ -301,21 +349,12 @@ static void task_release_locked(task_t task); static void task_synchronizer_destroy_all(task_t task); -void -task_backing_store_privileged( - task_t task) -{ - task_lock(task); - task->priv_flags |= VM_BACKING_STORE_PRIV; - task_unlock(task); - return; -} - void task_set_64bit( task_t task, - boolean_t is64bit) + boolean_t is_64bit, + boolean_t is_64bit_data) { #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) thread_t thread; @@ -323,15 +362,34 @@ task_set_64bit( task_lock(task); - if (is64bit) { - if (task_has_64BitAddr(task)) + /* + * Switching to/from 64-bit address spaces + */ + if (is_64bit) { + if (!task_has_64Bit_addr(task)) { + task_set_64Bit_addr(task); + } + } else { + if (task_has_64Bit_addr(task)) { + task_clear_64Bit_addr(task); + } + } + + /* + * Switching to/from 64-bit register state. + */ + if (is_64bit_data) { + if (task_has_64Bit_data(task)) goto out; - task_set_64BitAddr(task); + + task_set_64Bit_data(task); } else { - if ( !task_has_64BitAddr(task)) + if ( !task_has_64Bit_data(task)) goto out; - task_clear_64BitAddr(task); + + task_clear_64Bit_data(task); } + /* FIXME: On x86, the thread save state flavor can diverge from the * task's 64-bit feature flag due to the 32-bit/64-bit register save * state dichotomy. Since we can be pre-empted in this interval, @@ -376,6 +434,12 @@ out: task_unlock(task); } +boolean_t +task_get_64bit_data(task_t task) +{ + return task_has_64Bit_data(task); +} + void task_set_platform_binary( task_t task, @@ -390,6 +454,30 @@ task_set_platform_binary( task_unlock(task); } +/* + * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument. + * Returns "false" if flag is already set, and "true" in other cases. + */ +bool +task_set_ca_client_wi( + task_t task, + boolean_t set_or_clear) +{ + bool ret = true; + task_lock(task); + if (set_or_clear) { + /* Tasks can have only one CA_CLIENT work interval */ + if (task->t_flags & TF_CA_CLIENT_WI) + ret = false; + else + task->t_flags |= TF_CA_CLIENT_WI; + } else { + task->t_flags &= ~TF_CA_CLIENT_WI; + } + task_unlock(task); + return ret; +} + void task_set_dyld_info( task_t task, @@ -475,7 +563,7 @@ task_clear_return_wait(task_t task) task_unlock(task); } -void +void __attribute__((noreturn)) task_wait_to_return(void) { task_t task; @@ -497,9 +585,36 @@ task_wait_to_return(void) task_unlock(task); +#if CONFIG_MACF + /* + * Before jumping to userspace and allowing this process to execute any code, + * notify any interested parties. + */ + mac_proc_notify_exec_complete(current_proc()); +#endif + thread_bootstrap_return(); } +#ifdef CONFIG_32BIT_TELEMETRY +boolean_t +task_consume_32bit_log_flag(task_t task) +{ + if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) { + task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY; + return TRUE; + } else { + return FALSE; + } +} + +void +task_set_32bit_log_flag(task_t task) +{ + task->t_procflags |= TPF_LOG_32BIT_TELEMETRY; +} +#endif /* CONFIG_32BIT_TELEMETRY */ + boolean_t task_is_exec_copy(task_t task) { @@ -540,24 +655,24 @@ task_reference_internal(task_t task) void * bt[TASK_REF_BTDEPTH]; int numsaved = 0; + os_ref_retain(&task->ref_count); + numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); - - (void)hw_atomic_add(&(task)->ref_count, 1); btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR, bt, numsaved); } -uint32_t +os_ref_count_t task_deallocate_internal(task_t task) { void * bt[TASK_REF_BTDEPTH]; int numsaved = 0; numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); - btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR, bt, numsaved); - return hw_atomic_sub(&(task)->ref_count, 1); + + return os_ref_release(&task->ref_count); } #endif /* TASK_REFERENCE_LEAK_DEBUG */ @@ -654,10 +769,16 @@ task_init(void) #endif /* CONFIG_MEMORYSTATUS */ } -#if MACH_ASSERT - PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, - sizeof (pmap_ledgers_panic)); -#endif /* MACH_ASSERT */ +#if DEVELOPMENT || DEBUG + if (!PE_parse_boot_argn("exc_resource_threads", + &exc_resource_threads_enabled, + sizeof(exc_resource_threads_enabled))) { + exc_resource_threads_enabled = 1; + } + PE_parse_boot_argn("task_exc_guard_default", + &task_exc_guard_default, + sizeof(task_exc_guard_default)); +#endif /* DEVELOPMENT || DEBUG */ #if CONFIG_COREDUMP if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores, @@ -718,9 +839,9 @@ task_init(void) * Create the kernel task as the first task. */ #ifdef __LP64__ - if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #else - if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #endif panic("task_init\n"); @@ -826,8 +947,12 @@ init_task_ledgers(void) assert(kernel_task == TASK_NULL); #if MACH_ASSERT - PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, + PE_parse_boot_argn("pmap_ledgers_panic", + &pmap_ledgers_panic, sizeof (pmap_ledgers_panic)); + PE_parse_boot_argn("pmap_ledgers_panic_leeway", + &pmap_ledgers_panic_leeway, + sizeof (pmap_ledgers_panic_leeway)); #endif /* MACH_ASSERT */ if ((t = ledger_template_create("Per-task ledger")) == NULL) @@ -860,6 +985,12 @@ init_task_ledgers(void) task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes"); task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes"); task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes"); + + task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes"); + task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes"); + task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes"); + task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes"); + task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", "count"); task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", @@ -917,6 +1048,10 @@ init_task_ledgers(void) (task_ledgers.purgeable_nonvolatile < 0) || (task_ledgers.purgeable_volatile_compressed < 0) || (task_ledgers.purgeable_nonvolatile_compressed < 0) || + (task_ledgers.network_volatile < 0) || + (task_ledgers.network_nonvolatile < 0) || + (task_ledgers.network_volatile_compressed < 0) || + (task_ledgers.network_nonvolatile_compressed < 0) || (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0) || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) || @@ -940,6 +1075,11 @@ init_task_ledgers(void) ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed); ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed); + ledger_track_credit_only(t, task_ledgers.network_volatile); + ledger_track_credit_only(t, task_ledgers.network_nonvolatile); + ledger_track_credit_only(t, task_ledgers.network_volatile_compressed); + ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed); + ledger_track_maximum(t, task_ledgers.phys_footprint, 60); #if MACH_ASSERT if (pmap_ledgers_panic) { @@ -954,6 +1094,11 @@ init_task_ledgers(void) ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile); ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed); ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed); + + ledger_panic_on_negative(t, task_ledgers.network_volatile); + ledger_panic_on_negative(t, task_ledgers.network_nonvolatile); + ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed); + ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed); } #endif /* MACH_ASSERT */ @@ -970,12 +1115,15 @@ init_task_ledgers(void) task_ledger_template = t; } +os_refgrp_decl(static, task_refgrp, "task", NULL); + kern_return_t task_create_internal( task_t parent_task, coalition_t *parent_coalitions __unused, boolean_t inherit_memory, __unused boolean_t is_64bit, + boolean_t is_64bit_data, uint32_t t_flags, uint32_t t_procflags, task_t *child_task) /* OUT */ @@ -990,7 +1138,7 @@ task_create_internal( return(KERN_RESOURCE_SHORTAGE); /* one ref for just being alive; one for our caller */ - new_task->ref_count = 2; + os_ref_init_count(&new_task->ref_count, &task_refgrp, 2); /* allocate with active entries */ assert(task_ledger_template != NULL); @@ -1028,7 +1176,6 @@ task_create_internal( new_task->legacy_stop_count = 0; new_task->active = TRUE; new_task->halting = FALSE; - new_task->user_data = NULL; new_task->priv_flags = 0; new_task->t_flags = t_flags; new_task->t_procflags = t_procflags; @@ -1036,6 +1183,8 @@ task_create_internal( new_task->crashed_thread_id = 0; new_task->exec_token = 0; + new_task->task_exc_guard = task_exc_guard_default; + #if CONFIG_ATM new_task->atm_context = NULL; #endif @@ -1086,7 +1235,7 @@ task_create_internal( new_task->affinity_space = NULL; - new_task->t_chud = 0; + new_task->t_kpc = 0; new_task->pidsuspended = FALSE; new_task->frozen = FALSE; @@ -1112,13 +1261,12 @@ task_create_internal( new_task->mem_notify_reserved = 0; new_task->memlimit_attrs_reserved = 0; -#if IMPORTANCE_INHERITANCE - new_task->task_imp_base = NULL; -#endif /* IMPORTANCE_INHERITANCE */ new_task->requested_policy = default_task_requested_policy; new_task->effective_policy = default_task_effective_policy; + task_importance_init_from_parent(new_task, parent_task); + if (parent_task != TASK_NULL) { new_task->sec_token = parent_task->sec_token; new_task->audit_token = parent_task->audit_token; @@ -1127,8 +1275,14 @@ task_create_internal( shared_region = vm_shared_region_get(parent_task); vm_shared_region_set(new_task, shared_region); - if(task_has_64BitAddr(parent_task)) - task_set_64BitAddr(new_task); + if(task_has_64Bit_addr(parent_task)) { + task_set_64Bit_addr(new_task); + } + + if(task_has_64Bit_data(parent_task)) { + task_set_64Bit_data(new_task); + } + new_task->all_image_info_addr = parent_task->all_image_info_addr; new_task->all_image_info_size = parent_task->all_image_info_size; @@ -1137,43 +1291,6 @@ task_create_internal( new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); -#if IMPORTANCE_INHERITANCE - ipc_importance_task_t new_task_imp = IIT_NULL; - boolean_t inherit_receive = TRUE; - - if (task_is_marked_importance_donor(parent_task)) { - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_donor(new_task_imp, TRUE); - } -#if CONFIG_EMBEDDED - /* Embedded only wants to inherit for exec copy task */ - if ((t_procflags & TPF_EXEC_COPY) == 0) { - inherit_receive = FALSE; - } -#endif /* CONFIG_EMBEDDED */ - - if (inherit_receive) { - if (task_is_marked_importance_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_receiver(new_task_imp, TRUE); - } - if (task_is_marked_importance_denap_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); - } - } - - if (IIT_NULL != new_task_imp) { - assert(new_task->task_imp_base == new_task_imp); - ipc_importance_task_release(new_task_imp); - } -#endif /* IMPORTANCE_INHERITANCE */ - new_task->priority = BASEPRI_DEFAULT; new_task->max_priority = MAXPRI_USER; @@ -1182,9 +1299,15 @@ task_create_internal( new_task->sec_token = KERNEL_SECURITY_TOKEN; new_task->audit_token = KERNEL_AUDIT_TOKEN; #ifdef __LP64__ - if(is_64bit) - task_set_64BitAddr(new_task); + if(is_64bit) { + task_set_64Bit_addr(new_task); + } #endif + + if(is_64bit_data) { + task_set_64Bit_data(new_task); + } + new_task->all_image_info_addr = (mach_vm_address_t)0; new_task->all_image_info_size = (mach_vm_size_t)0; @@ -1208,7 +1331,8 @@ task_create_internal( assert(new_task->task_io_stats != NULL); bzero(new_task->task_io_stats, sizeof(struct io_stat_info)); - bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats)); + bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats)); + bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats)); bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); @@ -1220,6 +1344,7 @@ task_create_internal( new_task->total_user_time = 0; new_task->total_system_time = 0; new_task->total_ptime = 0; + new_task->total_runnable_time = 0; new_task->faults = 0; new_task->pageins = 0; new_task->cow_faults = 0; @@ -1292,20 +1417,33 @@ task_create_internal( new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset; } - if (vm_backing_store_low && parent_task != NULL) - new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); - new_task->task_volatile_objects = 0; new_task->task_nonvolatile_objects = 0; new_task->task_purgeable_disowning = FALSE; new_task->task_purgeable_disowned = FALSE; - + queue_init(&new_task->task_objq); + task_objq_lock_init(new_task); + +#if __arm64__ + new_task->task_legacy_footprint = FALSE; +#endif /* __arm64__ */ + new_task->task_region_footprint = FALSE; + new_task->task_has_crossed_thread_limit = FALSE; + new_task->task_thread_limit = 0; #if CONFIG_SECLUDED_MEMORY new_task->task_can_use_secluded_mem = FALSE; new_task->task_could_use_secluded_mem = FALSE; new_task->task_could_also_use_secluded_mem = FALSE; + new_task->task_suppressed_secluded = FALSE; #endif /* CONFIG_SECLUDED_MEMORY */ + /* + * t_flags is set up above. But since we don't + * support darkwake mode being set that way + * currently, we clear it out here explicitly. + */ + new_task->t_flags &= ~(TF_DARKWAKE_MODE); + queue_init(&new_task->io_user_clients); ipc_task_enable(new_task); @@ -1336,6 +1474,7 @@ task_rollup_accounting_info(task_t to_task, task_t from_task) to_task->total_user_time = from_task->total_user_time; to_task->total_system_time = from_task->total_system_time; to_task->total_ptime = from_task->total_ptime; + to_task->total_runnable_time = from_task->total_runnable_time; to_task->faults = from_task->faults; to_task->pageins = from_task->pageins; to_task->cow_faults = from_task->cow_faults; @@ -1353,7 +1492,8 @@ task_rollup_accounting_info(task_t to_task, task_t from_task) to_task->purged_memory_critical = from_task->purged_memory_critical; to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener; *to_task->task_io_stats = *from_task->task_io_stats; - to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats; + to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats; + to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats; to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1; to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2; to_task->task_gpu_ns = from_task->task_gpu_ns; @@ -1392,7 +1532,7 @@ task_deallocate( task_t task) { ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups; - uint32_t refs; + os_ref_count_t refs; if (task == TASK_NULL) return; @@ -1400,31 +1540,24 @@ task_deallocate( refs = task_deallocate_internal(task); #if IMPORTANCE_INHERITANCE - if (refs > 1) - return; - - atomic_load_explicit(&task->ref_count, memory_order_acquire); - if (refs == 1) { /* * If last ref potentially comes from the task's importance, * disconnect it. But more task refs may be added before * that completes, so wait for the reference to go to zero - * naturually (it may happen on a recursive task_deallocate() + * naturally (it may happen on a recursive task_deallocate() * from the ipc_importance_disconnect_task() call). */ if (IIT_NULL != task->task_imp_base) ipc_importance_disconnect_task(task); return; } -#else - if (refs > 0) - return; - - atomic_load_explicit(&task->ref_count, memory_order_acquire); - #endif /* IMPORTANCE_INHERITANCE */ + if (refs > 0) { + return; + } + lck_mtx_lock(&tasks_threads_lock); queue_remove(&terminated_tasks, task, task_t, tasks); terminated_tasks_count--; @@ -1549,6 +1682,8 @@ task_deallocate( } #endif + assert(queue_empty(&task->task_objq)); + zfree(task_zone, task); } @@ -1949,7 +2084,7 @@ task_duplicate_map_and_threads( kern_return_t kr = KERN_SUCCESS; int active; thread_t thread, self, thread_return = THREAD_NULL; - thread_t new_thread = THREAD_NULL; + thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL; thread_t *thread_array; uint32_t active_thread_count = 0, array_count = 0, i; vm_map_t oldmap; @@ -1978,21 +2113,28 @@ task_duplicate_map_and_threads( * * Skip it. */ +#if DEVELOPMENT || DEBUG + memorystatus_abort_vm_map_fork(task); +#endif task_resume_internal(task); return KERN_FAILURE; } /* Check with VM if vm_map_fork is allowed for this task */ - if (task_allowed_vm_map_fork(task)) { + if (memorystatus_allowed_vm_map_fork(task)) { /* Setup new task's vmmap, switch from parent task's map to it COW map */ oldmap = new_task->map; new_task->map = vm_map_fork(new_task->ledger, task->map, (VM_MAP_FORK_SHARE_IF_INHERIT_NONE | - VM_MAP_FORK_PRESERVE_PURGEABLE)); + VM_MAP_FORK_PRESERVE_PURGEABLE | + VM_MAP_FORK_CORPSE_FOOTPRINT)); vm_map_deallocate(oldmap); + /* copy ledgers that impact the memory footprint */ + vm_map_copy_footprint_ledgers(task, new_task); + /* Get all the udata pointers from kqueue */ est_knotes = kevent_proc_copy_uptrs(p, NULL, 0); if (est_knotes > 0) { @@ -2045,6 +2187,8 @@ task_duplicate_map_and_threads( if (thread_array[i] == self) { thread_return = new_thread; new_task->crashed_thread_id = thread_tid(new_thread); + } else if (first_thread == NULL) { + first_thread = new_thread; } else { /* drop the extra ref returned by thread_create_with_continuation */ thread_deallocate(new_thread); @@ -2060,9 +2204,19 @@ task_duplicate_map_and_threads( /* Copy thread name */ bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread); + new_thread->thread_tag = thread_array[i]->thread_tag; thread_copy_resource_info(new_thread, thread_array[i]); } + /* return the first thread if we couldn't find the equivalent of current */ + if (thread_return == THREAD_NULL) { + thread_return = first_thread; + } + else if (first_thread != THREAD_NULL) { + /* drop the extra ref returned by thread_create_with_continuation */ + thread_deallocate(first_thread); + } + task_resume_internal(task); for (i = 0; i < array_count; i++) { @@ -2087,16 +2241,6 @@ task_duplicate_map_and_threads( return kr; } -/* - * Place holder function to be filled by VM to return - * TRUE if vm_map_fork is allowed on the given task. - */ -boolean_t -task_allowed_vm_map_fork(task_t task __unused) -{ - return memorystatus_allowed_vm_map_fork(task); -} - #if CONFIG_SECLUDED_MEMORY extern void task_set_can_use_secluded_mem_locked( task_t task, @@ -2139,6 +2283,10 @@ task_terminate_internal( } task->task_could_use_secluded_mem = FALSE; task->task_could_also_use_secluded_mem = FALSE; + + if (task->task_suppressed_secluded) { + stop_secluded_suppression(task); + } #endif /* CONFIG_SECLUDED_MEMORY */ if (!task->active) { @@ -2267,21 +2415,6 @@ task_terminate_internal( vm_map_disable_hole_optimization(task->map); vm_map_unlock(task->map); - vm_map_remove(task->map, - task->map->min_offset, - task->map->max_offset, - /* - * Final cleanup: - * + no unnesting - * + remove immutable mappings - */ - (VM_MAP_REMOVE_NO_UNNESTING | - VM_MAP_REMOVE_IMMUTABLE)); - - /* release our shared region */ - vm_shared_region_set(task, NULL); - - #if MACH_ASSERT /* * Identify the pmap's process, in case the pmap ledgers drift @@ -2298,6 +2431,23 @@ task_terminate_internal( pmap_set_process(task->map->pmap, pid, procname); #endif /* MACH_ASSERT */ + vm_map_remove(task->map, + task->map->min_offset, + task->map->max_offset, + /* + * Final cleanup: + * + no unnesting + * + remove immutable mappings + * + allow gaps in range + */ + (VM_MAP_REMOVE_NO_UNNESTING | + VM_MAP_REMOVE_IMMUTABLE | + VM_MAP_REMOVE_GAPS_OK)); + + /* release our shared region */ + vm_shared_region_set(task, NULL); + + lck_mtx_lock(&tasks_threads_lock); queue_remove(&tasks, task, task_t, tasks); queue_enter(&terminated_tasks, task, task_t, tasks); @@ -2311,11 +2461,11 @@ task_terminate_internal( */ thread_interrupt_level(interrupt_save); -#if KPERF +#if KPC /* force the task to release all ctrs */ - if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS) + if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) kpc_force_all_ctrs(task, 0); -#endif +#endif /* KPC */ #if CONFIG_COALITIONS /* @@ -2481,9 +2631,11 @@ task_complete_halt(task_t task) * Final cleanup: * + no unnesting * + remove immutable mappings + * + allow gaps in the range */ (VM_MAP_REMOVE_NO_UNNESTING | - VM_MAP_REMOVE_IMMUTABLE)); + VM_MAP_REMOVE_IMMUTABLE | + VM_MAP_REMOVE_GAPS_OK)); /* * Kick out any IOKitUser handles to the task. At best they're stale, @@ -2499,7 +2651,7 @@ task_complete_halt(task_t task) * This is a recursive-style suspension of the task, a count of * suspends is maintained. * - * CONDITIONS: the task is locked and active. + * CONDITIONS: the task is locked and active. */ void task_hold_locked( @@ -2512,6 +2664,10 @@ task_hold_locked( if (task->suspend_count++ > 0) return; + if (task->bsd_info) { + workq_proc_suspended(task->bsd_info); + } + /* * Iterate through all the threads and hold them. */ @@ -2626,6 +2782,10 @@ task_release_locked( if (--task->suspend_count > 0) return; + if (task->bsd_info) { + workq_proc_resumed(task->bsd_info); + } + queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); thread_release(thread); @@ -3351,8 +3511,9 @@ task_freeze( uint32_t *clean_count, uint32_t *dirty_count, uint32_t dirty_budget, - boolean_t *shared, - boolean_t walk_only) + uint32_t *shared_count, + int *freezer_error_code, + boolean_t eval_only) { kern_return_t kr = KERN_SUCCESS; @@ -3377,22 +3538,29 @@ task_freeze( task_unlock(task); - if (walk_only) { - panic("task_freeze - walk_only == TRUE"); - } else { - kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared); - } + kr = vm_map_freeze(task->map, + purgeable_count, + wired_count, + clean_count, + dirty_count, + dirty_budget, + shared_count, + freezer_error_code, + eval_only); task_lock(task); - if (walk_only == FALSE && kr == KERN_SUCCESS) + if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) { task->frozen = TRUE; + } + task->changing_freeze_state = FALSE; thread_wakeup(&task->changing_freeze_state); task_unlock(task); - if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { + if (VM_CONFIG_COMPRESSOR_IS_PRESENT && + (eval_only == FALSE)) { vm_wake_compactor_swapper(); /* * We do an explicit wakeup of the swapout thread here @@ -3480,22 +3648,13 @@ host_security_set_task_token( kern_return_t task_send_trace_memory( - task_t target_task, + __unused task_t target_task, __unused uint32_t pid, __unused uint64_t uniqueid) { - kern_return_t kr = KERN_INVALID_ARGUMENT; - if (target_task == TASK_NULL) - return (KERN_INVALID_ARGUMENT); - -#if CONFIG_ATM - kr = atm_send_proc_inspect_notification(target_task, - pid, - uniqueid); - -#endif - return (kr); + return KERN_INVALID_ARGUMENT; } + /* * This routine was added, pretty much exclusively, for registering the * RPC glue vector for in-kernel short circuited tasks. Rather than @@ -3758,7 +3917,7 @@ task_info( if (thread->options & TH_OPT_IDLE_THREAD) continue; - thread_read_times(thread, &user_time, &system_time); + thread_read_times(thread, &user_time, &system_time, NULL); time_value_add(×_info->user_time, &user_time); time_value_add(×_info->system_time, &system_time); @@ -3842,7 +4001,7 @@ task_info( /* only set format on output for those expecting it */ if (*task_info_count >= TASK_DYLD_INFO_COUNT) { - info->all_image_info_format = task_has_64BitAddr(task) ? + info->all_image_info_format = task_has_64Bit_addr(task) ? TASK_DYLD_ALL_IMAGE_INFO_64 : TASK_DYLD_ALL_IMAGE_INFO_32 ; *task_info_count = TASK_DYLD_INFO_COUNT; @@ -4310,7 +4469,7 @@ task_info( flags_info = (task_flags_info_t)task_info_out; /* only publish the 64-bit flag of the task */ - flags_info->flags = task->t_flags & TF_64B_ADDR; + flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA); *task_info_count = TASK_FLAGS_INFO_COUNT; break; @@ -4334,6 +4493,8 @@ task_info( if (task->itk_space){ dbg_info->ipc_space_size = task->itk_space->is_table_size; } + + dbg_info->suspend_count = task->suspend_count; error = KERN_SUCCESS; *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT; @@ -4546,6 +4707,52 @@ task_cpu_ptime( } +/* This function updates the cpu time in the arrays for each + * effective and requested QoS class + */ +void +task_update_cpu_time_qos_stats( + task_t task, + uint64_t *eqos_stats, + uint64_t *rqos_stats) +{ + if (!eqos_stats && !rqos_stats) { + return; + } + + task_lock(task); + thread_t thread; + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (thread->options & TH_OPT_IDLE_THREAD) { + continue; + } + + thread_update_qos_cpu_time(thread); + } + + if (eqos_stats) { + eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default; + eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance; + eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background; + eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility; + eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy; + eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; + eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; + } + + if (rqos_stats) { + rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default; + rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance; + rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background; + rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility; + rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy; + rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; + rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; + } + + task_unlock(task); +} + kern_return_t task_purgable_info( task_t task, @@ -4887,9 +5094,6 @@ PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND( return kr; } -extern kern_return_t -task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *); - kern_return_t task_violated_guard( mach_exception_code_t code, @@ -4996,6 +5200,7 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, int pid = 0; const char *procname = "unknown"; mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + boolean_t send_sync_exc_resource = FALSE; #ifdef MACH_BSD pid = proc_selfpid(); @@ -5008,8 +5213,10 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, return; } - if (task->bsd_info != NULL) + if (task->bsd_info != NULL) { procname = proc_name_address(current_task()->bsd_info); + send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info); + } #endif #if CONFIG_COREDUMP if (hwm_user_cores) { @@ -5058,10 +5265,13 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK); EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb); - /* Do not generate a corpse fork if the violation is a fatal one */ - if (is_fatal || exc_via_corpse_forking == 0) { - /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */ - if (corpse_for_fatal_memkill == 0) { + /* + * Do not generate a corpse fork if the violation is a fatal one + * or the process wants synchronous EXC_RESOURCE exceptions. + */ + if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) { + /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */ + if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) { /* * Use the _internal_ variant so that no user-space * process can resume our task from under us. @@ -5071,8 +5281,13 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, task_resume_internal(task); } } else { - task_enqueue_exception_with_corpse(task, EXC_RESOURCE, + if (audio_active) { + printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE " + "supressed due to audio playback.\n", procname, pid, max_footprint_mb); + } else { + task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL); + } } /* @@ -5248,7 +5463,8 @@ task_set_phys_footprint_limit_internal( (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL); if (task == current_task()) { - ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint); + ledger_check_new_balance(current_thread(), task->ledger, + task_ledgers.phys_footprint); } task_unlock(task); @@ -5293,6 +5509,17 @@ task_get_phys_footprint_limit( } #endif /* CONFIG_MEMORYSTATUS */ +void +task_set_thread_limit(task_t task, uint16_t thread_limit) +{ + assert(task != kernel_task); + if (thread_limit <= TASK_MAX_THREAD_LIMIT) { + task_lock(task); + task->task_thread_limit = thread_limit; + task_unlock(task); + } +} + /* * We need to export some functions to other components that * are currently implemented in macros within the osfmk @@ -5776,15 +6003,18 @@ task_set_mach_voucher( kern_return_t task_swap_mach_voucher( - task_t task, - ipc_voucher_t new_voucher, - ipc_voucher_t *in_out_old_voucher) + __unused task_t task, + __unused ipc_voucher_t new_voucher, + ipc_voucher_t *in_out_old_voucher) { - if (TASK_NULL == task) - return KERN_INVALID_TASK; - - *in_out_old_voucher = new_voucher; - return KERN_SUCCESS; + /* + * Currently this function is only called from a MIG generated + * routine which doesn't release the reference on the voucher + * addressed by in_out_old_voucher. To avoid leaking this reference, + * a call to release it has been added here. + */ + ipc_voucher_release(*in_out_old_voucher); + return KERN_NOT_SUPPORTED; } void task_set_gpu_denied(task_t task, boolean_t denied) @@ -5961,7 +6191,7 @@ task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor, switch (flavor) { case TASK_INSPECT_BASIC_COUNTS: { struct task_inspect_basic_counts *bc; - uint64_t task_counts[MT_CORE_NFIXED]; + uint64_t task_counts[MT_CORE_NFIXED] = { 0 }; if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) { kr = KERN_INVALID_ARGUMENT; @@ -6050,7 +6280,8 @@ task_set_could_also_use_secluded_mem( boolean_t task_can_use_secluded_mem( - task_t task) + task_t task, + boolean_t is_alloc) { if (task->task_can_use_secluded_mem) { assert(task->task_could_use_secluded_mem); @@ -6062,6 +6293,20 @@ task_can_use_secluded_mem( assert(num_tasks_can_use_secluded_mem > 0); return TRUE; } + + /* + * If a single task is using more than some amount of + * memory, allow it to dip into secluded and also begin + * suppression of secluded memory until the tasks exits. + */ + if (is_alloc && secluded_shutoff_trigger != 0) { + uint64_t phys_used = get_task_phys_footprint(task); + if (phys_used > secluded_shutoff_trigger) { + start_secluded_suppression(task); + return TRUE; + } + } + return FALSE; } @@ -6084,3 +6329,70 @@ task_copy_fields_for_exec(task_t dst_task, task_t src_task) { dst_task->vtimers = src_task->vtimers; } + +#if DEVELOPMENT || DEBUG +int vm_region_footprint = 0; +#endif /* DEVELOPMENT || DEBUG */ + +boolean_t +task_self_region_footprint(void) +{ +#if DEVELOPMENT || DEBUG + if (vm_region_footprint) { + /* system-wide override */ + return TRUE; + } +#endif /* DEVELOPMENT || DEBUG */ + return current_task()->task_region_footprint; +} + +void +task_self_region_footprint_set( + boolean_t newval) +{ + task_t curtask; + + curtask = current_task(); + task_lock(curtask); + if (newval) { + curtask->task_region_footprint = TRUE; + } else { + curtask->task_region_footprint = FALSE; + } + task_unlock(curtask); +} + +void +task_set_darkwake_mode(task_t task, boolean_t set_mode) +{ + assert(task); + + task_lock(task); + + if (set_mode) { + task->t_flags |= TF_DARKWAKE_MODE; + } else { + task->t_flags &= ~(TF_DARKWAKE_MODE); + } + + task_unlock(task); +} + +boolean_t +task_get_darkwake_mode(task_t task) +{ + assert(task); + return ((task->t_flags & TF_DARKWAKE_MODE) != 0); +} + +#if __arm64__ +void +task_set_legacy_footprint( + task_t task, + boolean_t new_val) +{ + task_lock(task); + task->task_legacy_footprint = new_val; + task_unlock(task); +} +#endif /* __arm64__ */