X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/743345f9a4b36f7e2f9ba37691e70c50baecb56e..b226f5e54a60dc81db17b1260381d7dbfea3cdf1:/osfmk/kern/task.c diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index 4be2588be..792daf7bc 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,13 @@ #include #endif +#if MONOTONIC +#include +#include +#endif /* MONOTONIC */ + +#include + #include #include #include /* for kernel_map, ipc_kernel_map */ @@ -153,6 +161,7 @@ #include #include +#include #if CONFIG_ATM #include @@ -175,8 +184,8 @@ lck_grp_t task_lck_grp; lck_grp_attr_t task_lck_grp_attr; extern int exc_via_corpse_forking; -extern int unify_corpse_blob_alloc; extern int corpse_for_fatal_memkill; +extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p); /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */ int audio_active = 0; @@ -190,14 +199,39 @@ lck_spin_t dead_task_statistics_lock; ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers __attribute__((used)) = - {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - { 0 /* initialized at runtime */}, -#ifdef CONFIG_BANK - -1, -1, -#endif - -1, -1, - }; +SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) = +{.cpu_time = -1, + .tkm_private = -1, + .tkm_shared = -1, + .phys_mem = -1, + .wired_mem = -1, + .internal = -1, + .iokit_mapped = -1, + .alternate_accounting = -1, + .alternate_accounting_compressed = -1, + .page_table = -1, + .phys_footprint = -1, + .internal_compressed = -1, + .purgeable_volatile = -1, + .purgeable_nonvolatile = -1, + .purgeable_volatile_compressed = -1, + .purgeable_nonvolatile_compressed = -1, + .network_volatile = -1, + .network_nonvolatile = -1, + .network_volatile_compressed = -1, + .network_nonvolatile_compressed = -1, + .platform_idle_wakeups = -1, + .interrupt_wakeups = -1, +#if !CONFIG_EMBEDDED + .sfi_wait_times = { 0 /* initialized at runtime */}, +#endif /* !CONFIG_EMBEDDED */ + .cpu_time_billed_to_me = -1, + .cpu_time_billed_to_others = -1, + .physical_writes = -1, + .logical_writes = -1, + .energy_billed_to_me = -1, + .energy_billed_to_others = -1 +}; /* System sleep state */ boolean_t tasks_suspend_state; @@ -214,7 +248,6 @@ void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO kern_return_t task_suspend_internal(task_t); kern_return_t task_resume_internal(task_t); static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse); -int proc_list_uptrs(void *p, uint64_t *udata_buffer, int size); extern kern_return_t iokit_task_terminate(task_t task); @@ -259,8 +292,11 @@ int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated int64_t global_logical_writes_count = 0; /* Global count for logical writes */ static boolean_t global_update_logical_writes(int64_t); +#define TASK_MAX_THREAD_LIMIT 256 + #if MACH_ASSERT int pmap_ledgers_panic = 1; +int pmap_ledgers_panic_leeway = 3; #endif /* MACH_ASSERT */ int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ @@ -273,17 +309,38 @@ int hwm_user_cores = 0; /* high watermark violations generate user core files */ extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long); extern int proc_pid(struct proc *p); extern int proc_selfpid(void); +extern struct proc *current_proc(void); extern char *proc_name_address(struct proc *p); extern uint64_t get_dispatchqueue_offset_from_proc(void *); +extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize); +extern void workq_proc_suspended(struct proc *p); +extern void workq_proc_resumed(struct proc *p); #if CONFIG_MEMORYSTATUS extern void proc_memstat_terminated(struct proc* p, boolean_t set); -extern boolean_t memorystatus_turnoff_exception_and_get_fatalness(boolean_t warning, const int max_footprint_mb); -extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t is_fatal); +extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal); +extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal); +extern boolean_t memorystatus_allowed_vm_map_fork(task_t task); + +#if DEVELOPMENT || DEBUG +extern void memorystatus_abort_vm_map_fork(task_t); +#endif + #endif /* CONFIG_MEMORYSTATUS */ #endif /* MACH_BSD */ +#if DEVELOPMENT || DEBUG +int exc_resource_threads_enabled; +#endif /* DEVELOPMENT || DEBUG */ + +#if (DEVELOPMENT || DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE +uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE | + TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_CORPSE; +#else +uint32_t task_exc_guard_default = 0; +#endif + /* Forwards */ static void task_hold_locked(task_t task); @@ -292,21 +349,12 @@ static void task_release_locked(task_t task); static void task_synchronizer_destroy_all(task_t task); -void -task_backing_store_privileged( - task_t task) -{ - task_lock(task); - task->priv_flags |= VM_BACKING_STORE_PRIV; - task_unlock(task); - return; -} - void task_set_64bit( task_t task, - boolean_t is64bit) + boolean_t is_64bit, + boolean_t is_64bit_data) { #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) thread_t thread; @@ -314,15 +362,34 @@ task_set_64bit( task_lock(task); - if (is64bit) { - if (task_has_64BitAddr(task)) + /* + * Switching to/from 64-bit address spaces + */ + if (is_64bit) { + if (!task_has_64Bit_addr(task)) { + task_set_64Bit_addr(task); + } + } else { + if (task_has_64Bit_addr(task)) { + task_clear_64Bit_addr(task); + } + } + + /* + * Switching to/from 64-bit register state. + */ + if (is_64bit_data) { + if (task_has_64Bit_data(task)) goto out; - task_set_64BitAddr(task); + + task_set_64Bit_data(task); } else { - if ( !task_has_64BitAddr(task)) + if ( !task_has_64Bit_data(task)) goto out; - task_clear_64BitAddr(task); + + task_clear_64Bit_data(task); } + /* FIXME: On x86, the thread save state flavor can diverge from the * task's 64-bit feature flag due to the 32-bit/64-bit register save * state dichotomy. Since we can be pre-empted in this interval, @@ -330,12 +397,14 @@ task_set_64bit( * state with respect to its task's 64-bitness. */ -#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) +#if defined(__x86_64__) || defined(__arm64__) queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); machine_thread_switch_addrmode(thread); thread_mtx_unlock(thread); +#if defined(__arm64__) + /* specifically, if running on H9 */ if (thread == current_thread()) { uint64_t arg1, arg2; int urgency; @@ -348,28 +417,77 @@ task_set_64bit( * * This is needed for bring-up, a different callback should be used * in the future. + * + * TODO: Remove this callout when we no longer support 32-bit code on H9 */ thread_lock(thread); urgency = thread_get_urgency(thread, &arg1, &arg2); - machine_thread_going_on_core(thread, urgency, 0); + machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time()); thread_unlock(thread); splx(spl); } +#endif /* defined(__arm64__) */ } -#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */ +#endif /* defined(__x86_64__) || defined(__arm64__) */ out: task_unlock(task); } +boolean_t +task_get_64bit_data(task_t task) +{ + return task_has_64Bit_data(task); +} void -task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size) +task_set_platform_binary( + task_t task, + boolean_t is_platform) { task_lock(task); - task->all_image_info_addr = addr; - task->all_image_info_size = size; + if (is_platform) { + task->t_flags |= TF_PLATFORM; + } else { + task->t_flags &= ~(TF_PLATFORM); + } + task_unlock(task); +} + +/* + * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument. + * Returns "false" if flag is already set, and "true" in other cases. + */ +bool +task_set_ca_client_wi( + task_t task, + boolean_t set_or_clear) +{ + bool ret = true; + task_lock(task); + if (set_or_clear) { + /* Tasks can have only one CA_CLIENT work interval */ + if (task->t_flags & TF_CA_CLIENT_WI) + ret = false; + else + task->t_flags |= TF_CA_CLIENT_WI; + } else { + task->t_flags &= ~TF_CA_CLIENT_WI; + } task_unlock(task); + return ret; +} + +void +task_set_dyld_info( + task_t task, + mach_vm_address_t addr, + mach_vm_size_t size) +{ + task_lock(task); + task->all_image_info_addr = addr; + task->all_image_info_size = size; + task_unlock(task); } void @@ -387,12 +505,9 @@ task_atm_reset(__unused task_t task) { void task_bank_reset(__unused task_t task) { -#if CONFIG_BANK if (task->bank_context != NULL) { bank_task_destroy(task); } -#endif - } /* @@ -403,13 +518,10 @@ task_bank_reset(__unused task_t task) { void task_bank_init(__unused task_t task) { -#if CONFIG_BANK if (task->bank_context != NULL) { panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context); } bank_task_initialize(task); -#endif - } void @@ -451,7 +563,7 @@ task_clear_return_wait(task_t task) task_unlock(task); } -void +void __attribute__((noreturn)) task_wait_to_return(void) { task_t task; @@ -473,9 +585,36 @@ task_wait_to_return(void) task_unlock(task); +#if CONFIG_MACF + /* + * Before jumping to userspace and allowing this process to execute any code, + * notify any interested parties. + */ + mac_proc_notify_exec_complete(current_proc()); +#endif + thread_bootstrap_return(); } +#ifdef CONFIG_32BIT_TELEMETRY +boolean_t +task_consume_32bit_log_flag(task_t task) +{ + if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) { + task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY; + return TRUE; + } else { + return FALSE; + } +} + +void +task_set_32bit_log_flag(task_t task) +{ + task->t_procflags |= TPF_LOG_32BIT_TELEMETRY; +} +#endif /* CONFIG_32BIT_TELEMETRY */ + boolean_t task_is_exec_copy(task_t task) { @@ -494,6 +633,12 @@ task_is_active(task_t task) return task->active; } +boolean_t +task_is_halting(task_t task) +{ + return task->halting; +} + #if TASK_REFERENCE_LEAK_DEBUG #include @@ -510,24 +655,24 @@ task_reference_internal(task_t task) void * bt[TASK_REF_BTDEPTH]; int numsaved = 0; + os_ref_retain(&task->ref_count); + numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); - - (void)hw_atomic_add(&(task)->ref_count, 1); btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR, bt, numsaved); } -uint32_t +os_ref_count_t task_deallocate_internal(task_t task) { void * bt[TASK_REF_BTDEPTH]; int numsaved = 0; numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); - btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR, bt, numsaved); - return hw_atomic_sub(&(task)->ref_count, 1); + + return os_ref_release(&task->ref_count); } #endif /* TASK_REFERENCE_LEAK_DEBUG */ @@ -550,6 +695,9 @@ task_init(void) zone_change(task_zone, Z_NOENCRYPT, TRUE); +#if CONFIG_EMBEDDED + task_watch_init(); +#endif /* CONFIG_EMBEDDED */ /* * Configure per-task memory limit. @@ -621,10 +769,16 @@ task_init(void) #endif /* CONFIG_MEMORYSTATUS */ } -#if MACH_ASSERT - PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, - sizeof (pmap_ledgers_panic)); -#endif /* MACH_ASSERT */ +#if DEVELOPMENT || DEBUG + if (!PE_parse_boot_argn("exc_resource_threads", + &exc_resource_threads_enabled, + sizeof(exc_resource_threads_enabled))) { + exc_resource_threads_enabled = 1; + } + PE_parse_boot_argn("task_exc_guard_default", + &task_exc_guard_default, + sizeof(task_exc_guard_default)); +#endif /* DEVELOPMENT || DEBUG */ #if CONFIG_COREDUMP if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores, @@ -685,12 +839,13 @@ task_init(void) * Create the kernel task as the first task. */ #ifdef __LP64__ - if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #else - if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #endif panic("task_init\n"); + vm_map_deallocate(kernel_task->map); kernel_task->map = kernel_map; lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr); @@ -792,8 +947,12 @@ init_task_ledgers(void) assert(kernel_task == TASK_NULL); #if MACH_ASSERT - PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, + PE_parse_boot_argn("pmap_ledgers_panic", + &pmap_ledgers_panic, sizeof (pmap_ledgers_panic)); + PE_parse_boot_argn("pmap_ledgers_panic_leeway", + &pmap_ledgers_panic_leeway, + sizeof (pmap_ledgers_panic_leeway)); #endif /* MACH_ASSERT */ if ((t = ledger_template_create("Per-task ledger")) == NULL) @@ -826,6 +985,12 @@ init_task_ledgers(void) task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes"); task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes"); task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes"); + + task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes"); + task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes"); + task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes"); + task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes"); + task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", "count"); task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", @@ -860,12 +1025,12 @@ init_task_ledgers(void) assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1); #endif /* CONFIG_SCHED_SFI */ -#ifdef CONFIG_BANK task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns"); task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns"); -#endif task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes"); task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes"); + task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj"); + task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj"); if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) || @@ -883,18 +1048,23 @@ init_task_ledgers(void) (task_ledgers.purgeable_nonvolatile < 0) || (task_ledgers.purgeable_volatile_compressed < 0) || (task_ledgers.purgeable_nonvolatile_compressed < 0) || + (task_ledgers.network_volatile < 0) || + (task_ledgers.network_nonvolatile < 0) || + (task_ledgers.network_volatile_compressed < 0) || + (task_ledgers.network_nonvolatile_compressed < 0) || (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0) || -#ifdef CONFIG_BANK (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) || -#endif (task_ledgers.physical_writes < 0) || - (task_ledgers.logical_writes < 0) + (task_ledgers.logical_writes < 0) || + (task_ledgers.energy_billed_to_me < 0) || + (task_ledgers.energy_billed_to_others < 0) ) { panic("couldn't create entries for task ledger template"); } ledger_track_credit_only(t, task_ledgers.phys_footprint); + ledger_track_credit_only(t, task_ledgers.page_table); ledger_track_credit_only(t, task_ledgers.internal); ledger_track_credit_only(t, task_ledgers.internal_compressed); ledger_track_credit_only(t, task_ledgers.iokit_mapped); @@ -905,6 +1075,11 @@ init_task_ledgers(void) ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed); ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed); + ledger_track_credit_only(t, task_ledgers.network_volatile); + ledger_track_credit_only(t, task_ledgers.network_nonvolatile); + ledger_track_credit_only(t, task_ledgers.network_volatile_compressed); + ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed); + ledger_track_maximum(t, task_ledgers.phys_footprint, 60); #if MACH_ASSERT if (pmap_ledgers_panic) { @@ -919,6 +1094,11 @@ init_task_ledgers(void) ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile); ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed); ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed); + + ledger_panic_on_negative(t, task_ledgers.network_volatile); + ledger_panic_on_negative(t, task_ledgers.network_nonvolatile); + ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed); + ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed); } #endif /* MACH_ASSERT */ @@ -929,16 +1109,21 @@ init_task_ledgers(void) ledger_set_callback(t, task_ledgers.interrupt_wakeups, task_wakeups_rate_exceeded, NULL, NULL); ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL); - ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL); + ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL); + + ledger_template_complete(t); task_ledger_template = t; } +os_refgrp_decl(static, task_refgrp, "task", NULL); + kern_return_t task_create_internal( task_t parent_task, coalition_t *parent_coalitions __unused, boolean_t inherit_memory, - boolean_t is_64bit, + __unused boolean_t is_64bit, + boolean_t is_64bit_data, uint32_t t_flags, uint32_t t_procflags, task_t *child_task) /* OUT */ @@ -953,7 +1138,7 @@ task_create_internal( return(KERN_RESOURCE_SHORTAGE); /* one ref for just being alive; one for our caller */ - new_task->ref_count = 2; + os_ref_init_count(&new_task->ref_count, &task_refgrp, 2); /* allocate with active entries */ assert(task_ledger_template != NULL); @@ -963,6 +1148,7 @@ task_create_internal( return(KERN_RESOURCE_SHORTAGE); } + new_task->ledger = ledger; #if defined(CONFIG_SCHED_MULTIQ) @@ -990,20 +1176,19 @@ task_create_internal( new_task->legacy_stop_count = 0; new_task->active = TRUE; new_task->halting = FALSE; - new_task->user_data = NULL; new_task->priv_flags = 0; new_task->t_flags = t_flags; new_task->t_procflags = t_procflags; new_task->importance = 0; - new_task->corpse_info_kernel = NULL; + new_task->crashed_thread_id = 0; new_task->exec_token = 0; + new_task->task_exc_guard = task_exc_guard_default; + #if CONFIG_ATM new_task->atm_context = NULL; #endif -#if CONFIG_BANK new_task->bank_context = NULL; -#endif #ifdef MACH_BSD new_task->bsd_info = NULL; @@ -1031,9 +1216,7 @@ task_create_internal( task_io_monitor_ctl(new_task, &flags); #endif /* CONFIG_IO_ACCOUNTING */ -#if defined(__i386__) || defined(__x86_64__) - new_task->i386_ldt = 0; -#endif + machine_task_init(new_task, parent_task, inherit_memory); new_task->task_debug = NULL; @@ -1052,6 +1235,8 @@ task_create_internal( new_task->affinity_space = NULL; + new_task->t_kpc = 0; + new_task->pidsuspended = FALSE; new_task->frozen = FALSE; new_task->changing_freeze_state = FALSE; @@ -1068,19 +1253,20 @@ task_create_internal( new_task->hv_task_target = NULL; #endif /* HYPERVISOR */ +#if CONFIG_EMBEDDED + queue_init(&new_task->task_watchers); + new_task->num_taskwatchers = 0; + new_task->watchapplying = 0; +#endif /* CONFIG_EMBEDDED */ new_task->mem_notify_reserved = 0; -#if IMPORTANCE_INHERITANCE - new_task->task_imp_base = NULL; -#endif /* IMPORTANCE_INHERITANCE */ - -#if defined(__x86_64__) - new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0; -#endif + new_task->memlimit_attrs_reserved = 0; new_task->requested_policy = default_task_requested_policy; new_task->effective_policy = default_task_effective_policy; + task_importance_init_from_parent(new_task, parent_task); + if (parent_task != TASK_NULL) { new_task->sec_token = parent_task->sec_token; new_task->audit_token = parent_task->audit_token; @@ -1089,51 +1275,22 @@ task_create_internal( shared_region = vm_shared_region_get(parent_task); vm_shared_region_set(new_task, shared_region); - if(task_has_64BitAddr(parent_task)) - task_set_64BitAddr(new_task); + if(task_has_64Bit_addr(parent_task)) { + task_set_64Bit_addr(new_task); + } + + if(task_has_64Bit_data(parent_task)) { + task_set_64Bit_data(new_task); + } + new_task->all_image_info_addr = parent_task->all_image_info_addr; new_task->all_image_info_size = parent_task->all_image_info_size; -#if defined(__i386__) || defined(__x86_64__) - if (inherit_memory && parent_task->i386_ldt) - new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt); -#endif if (inherit_memory && parent_task->affinity_space) task_affinity_create(parent_task, new_task); new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); -#if IMPORTANCE_INHERITANCE - ipc_importance_task_t new_task_imp = IIT_NULL; - boolean_t inherit_receive = TRUE; - - if (task_is_marked_importance_donor(parent_task)) { - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_donor(new_task_imp, TRUE); - } - - if (inherit_receive) { - if (task_is_marked_importance_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_receiver(new_task_imp, TRUE); - } - if (task_is_marked_importance_denap_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); - } - } - - if (IIT_NULL != new_task_imp) { - assert(new_task->task_imp_base == new_task_imp); - ipc_importance_task_release(new_task_imp); - } -#endif /* IMPORTANCE_INHERITANCE */ - new_task->priority = BASEPRI_DEFAULT; new_task->max_priority = MAXPRI_USER; @@ -1142,9 +1299,15 @@ task_create_internal( new_task->sec_token = KERNEL_SECURITY_TOKEN; new_task->audit_token = KERNEL_AUDIT_TOKEN; #ifdef __LP64__ - if(is_64bit) - task_set_64BitAddr(new_task); + if(is_64bit) { + task_set_64Bit_addr(new_task); + } #endif + + if(is_64bit_data) { + task_set_64Bit_data(new_task); + } + new_task->all_image_info_addr = (mach_vm_address_t)0; new_task->all_image_info_size = (mach_vm_size_t)0; @@ -1168,7 +1331,8 @@ task_create_internal( assert(new_task->task_io_stats != NULL); bzero(new_task->task_io_stats, sizeof(struct io_stat_info)); - bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats)); + bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats)); + bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats)); bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); @@ -1179,6 +1343,8 @@ task_create_internal( /* Initialize to zero for standard fork/spawn case */ new_task->total_user_time = 0; new_task->total_system_time = 0; + new_task->total_ptime = 0; + new_task->total_runnable_time = 0; new_task->faults = 0; new_task->pageins = 0; new_task->cow_faults = 0; @@ -1194,6 +1360,10 @@ task_create_internal( new_task->purged_memory_warn = 0; new_task->purged_memory_critical = 0; new_task->low_mem_privileged_listener = 0; + new_task->memlimit_is_active = 0; + new_task->memlimit_is_fatal = 0; + new_task->memlimit_active_exc_resource = 0; + new_task->memlimit_inactive_exc_resource = 0; new_task->task_timer_wakeups_bin_1 = 0; new_task->task_timer_wakeups_bin_2 = 0; new_task->task_gpu_ns = 0; @@ -1202,6 +1372,9 @@ task_create_internal( new_task->task_invalidated_writes = 0; new_task->task_metadata_writes = 0; new_task->task_energy = 0; +#if MONOTONIC + memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic)); +#endif /* MONOTONIC */ } @@ -1221,6 +1394,15 @@ task_create_internal( /* TODO: assert that new_task will be PID 1 (launchd) */ coalitions_adopt_init_task(new_task); } + /* + * on exec, we need to transfer the coalition roles from the + * parent task to the exec copy task. + */ + if (parent_task && (t_procflags & TPF_EXEC_COPY)) { + int coal_roles[COALITION_NUM_TYPES]; + task_coalition_roles(parent_task, coal_roles); + (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles); + } } else { coalitions_adopt_corpse_task(new_task); } @@ -1235,20 +1417,33 @@ task_create_internal( new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset; } - if (vm_backing_store_low && parent_task != NULL) - new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); - new_task->task_volatile_objects = 0; new_task->task_nonvolatile_objects = 0; new_task->task_purgeable_disowning = FALSE; new_task->task_purgeable_disowned = FALSE; - + queue_init(&new_task->task_objq); + task_objq_lock_init(new_task); + +#if __arm64__ + new_task->task_legacy_footprint = FALSE; +#endif /* __arm64__ */ + new_task->task_region_footprint = FALSE; + new_task->task_has_crossed_thread_limit = FALSE; + new_task->task_thread_limit = 0; #if CONFIG_SECLUDED_MEMORY new_task->task_can_use_secluded_mem = FALSE; new_task->task_could_use_secluded_mem = FALSE; new_task->task_could_also_use_secluded_mem = FALSE; + new_task->task_suppressed_secluded = FALSE; #endif /* CONFIG_SECLUDED_MEMORY */ + /* + * t_flags is set up above. But since we don't + * support darkwake mode being set that way + * currently, we clear it out here explicitly. + */ + new_task->t_flags &= ~(TF_DARKWAKE_MODE); + queue_init(&new_task->io_user_clients); ipc_task_enable(new_task); @@ -1278,6 +1473,8 @@ task_rollup_accounting_info(task_t to_task, task_t from_task) to_task->total_user_time = from_task->total_user_time; to_task->total_system_time = from_task->total_system_time; + to_task->total_ptime = from_task->total_ptime; + to_task->total_runnable_time = from_task->total_runnable_time; to_task->faults = from_task->faults; to_task->pageins = from_task->pageins; to_task->cow_faults = from_task->cow_faults; @@ -1295,7 +1492,8 @@ task_rollup_accounting_info(task_t to_task, task_t from_task) to_task->purged_memory_critical = from_task->purged_memory_critical; to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener; *to_task->task_io_stats = *from_task->task_io_stats; - to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats; + to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats; + to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats; to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1; to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2; to_task->task_gpu_ns = from_task->task_gpu_ns; @@ -1314,12 +1512,12 @@ task_rollup_accounting_info(task_t to_task, task_t from_task) ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]); } #endif -#if CONFIG_BANK ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me); ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others); -#endif ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes); ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others); } int task_dropped_imp_count = 0; @@ -1334,7 +1532,7 @@ task_deallocate( task_t task) { ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups; - uint32_t refs; + os_ref_count_t refs; if (task == TASK_NULL) return; @@ -1342,26 +1540,24 @@ task_deallocate( refs = task_deallocate_internal(task); #if IMPORTANCE_INHERITANCE - if (refs > 1) - return; - if (refs == 1) { /* * If last ref potentially comes from the task's importance, * disconnect it. But more task refs may be added before * that completes, so wait for the reference to go to zero - * naturually (it may happen on a recursive task_deallocate() + * naturally (it may happen on a recursive task_deallocate() * from the ipc_importance_disconnect_task() call). */ if (IIT_NULL != task->task_imp_base) ipc_importance_disconnect_task(task); return; } -#else - if (refs > 0) - return; #endif /* IMPORTANCE_INHERITANCE */ + if (refs > 0) { + return; + } + lck_mtx_lock(&tasks_threads_lock); queue_remove(&terminated_tasks, task, task_t, tasks); terminated_tasks_count--; @@ -1437,6 +1633,10 @@ task_deallocate( dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1; dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2; + dead_task_statistics.total_ptime += task->total_ptime; + dead_task_statistics.total_pset_switches += task->ps_switch; + dead_task_statistics.task_gpu_ns += task->task_gpu_ns; + dead_task_statistics.task_energy += task->task_energy; lck_spin_unlock(&dead_task_statistics_lock); lck_mtx_destroy(&task->lock, &task_lck_grp); @@ -1466,20 +1666,24 @@ task_deallocate( #if MACH_BSD /* clean up collected information since last reference to task is gone */ if (task->corpse_info) { - task_crashinfo_destroy(task->corpse_info, RELEASE_CORPSE_REF); + void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info); + task_crashinfo_destroy(task->corpse_info); task->corpse_info = NULL; + if (corpse_info_kernel) { + kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE); + } } #endif - if (task->corpse_info_kernel) { - kfree(task->corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE); - } #if CONFIG_MACF if (task->crash_label) { - mac_exc_action_label_task_destroy(task); + mac_exc_free_label(task->crash_label); + task->crash_label = NULL; } #endif + assert(queue_empty(&task->task_objq)); + zfree(task_zone, task); } @@ -1495,6 +1699,18 @@ task_name_deallocate( return(task_deallocate((task_t)task_name)); } +/* + * task_inspect_deallocate: + * + * Drop a task inspection reference. + */ +void +task_inspect_deallocate( + task_inspect_t task_inspect) +{ + return(task_deallocate((task_t)task_inspect)); +} + /* * task_suspension_token_deallocate: * @@ -1514,7 +1730,12 @@ task_suspension_token_deallocate( * collect crash info from bsd and mach based data */ kern_return_t -task_collect_crash_info(task_t task, struct proc *proc, int is_corpse_fork) +task_collect_crash_info( + task_t task, +#ifdef CONFIG_MACF + struct label *crash_label, +#endif + int is_corpse_fork) { kern_return_t kr = KERN_SUCCESS; @@ -1524,60 +1745,57 @@ task_collect_crash_info(task_t task, struct proc *proc, int is_corpse_fork) mach_vm_offset_t crash_data_ptr = 0; void *crash_data_kernel = NULL; void *crash_data_kernel_release = NULL; - int corpse_blob_kernel_alloc = (is_corpse_fork || unify_corpse_blob_alloc); +#if CONFIG_MACF + struct label *label, *free_label; +#endif if (!corpses_enabled()) { return KERN_NOT_SUPPORTED; } +#if CONFIG_MACF + free_label = label = mac_exc_create_label(); +#endif + task_lock(task); assert(is_corpse_fork || task->bsd_info != NULL); if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) { #if CONFIG_MACF - /* Update the corpse label, used by the exception delivery mac hook */ - mac_exc_action_label_task_update(task, proc); + /* Set the crash label, used by the exception delivery mac hook */ + free_label = task->crash_label; // Most likely NULL. + task->crash_label = label; + mac_exc_update_task_crash_label(task, crash_label); #endif task_unlock(task); - if (!corpse_blob_kernel_alloc) { - /* map crash data memory in task's vm map */ - kr = mach_vm_allocate(task->map, &crash_data_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE)); - } else { - crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE); - if (crash_data_kernel == 0) - kr = KERN_RESOURCE_SHORTAGE; - bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE); - crash_data_ptr = (mach_vm_offset_t) crash_data_kernel; - } - if (kr != KERN_SUCCESS) + crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE); + if (crash_data_kernel == NULL) { + kr = KERN_RESOURCE_SHORTAGE; goto out_no_lock; + } + bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE); + crash_data_ptr = (mach_vm_offset_t) crash_data_kernel; /* Do not get a corpse ref for corpse fork */ - crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size, is_corpse_fork ? !GET_CORPSE_REF : GET_CORPSE_REF, corpse_blob_kernel_alloc ? KCFLAG_USE_MEMCOPY: KCFLAG_USE_COPYOUT); + crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size, + is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF, + KCFLAG_USE_MEMCOPY); if (crash_data) { task_lock(task); crash_data_release = task->corpse_info; - crash_data_kernel_release = task->corpse_info_kernel; + crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release); task->corpse_info = crash_data; - task->corpse_info_kernel = crash_data_kernel; task_unlock(task); kr = KERN_SUCCESS; } else { - /* if failed to create corpse info, free the mapping */ - if (!corpse_blob_kernel_alloc) { - if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_ptr, size)) { - printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task)); - } - } else { - kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE); - } + kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE); kr = KERN_FAILURE; } if (crash_data_release != NULL) { - task_crashinfo_destroy(crash_data_release, is_corpse_fork ? !RELEASE_CORPSE_REF : RELEASE_CORPSE_REF); + task_crashinfo_destroy(crash_data_release); } if (crash_data_kernel_release != NULL) { kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE); @@ -1587,6 +1805,11 @@ task_collect_crash_info(task_t task, struct proc *proc, int is_corpse_fork) } out_no_lock: +#if CONFIG_MACF + if (free_label != NULL) { + mac_exc_free_label(free_label); + } +#endif return kr; } @@ -1596,7 +1819,11 @@ out_no_lock: * Makes outcall to registered host port for a corpse. */ kern_return_t -task_deliver_crash_notification(task_t task, thread_t thread, mach_exception_data_type_t subcode) +task_deliver_crash_notification( + task_t task, + thread_t thread, + exception_type_t etype, + mach_exception_subcode_t subcode) { kcdata_descriptor_t crash_info = task->corpse_info; thread_t th_iter = NULL; @@ -1610,10 +1837,10 @@ task_deliver_crash_notification(task_t task, thread_t thread, mach_exception_dat task_lock(task); if (task_is_a_corpse_fork(task)) { - /* Populate code with EXC_RESOURCE for corpse fork */ - code[0] = EXC_RESOURCE; + /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */ + code[0] = etype; code[1] = subcode; - } else if (unify_corpse_blob_alloc) { + } else { /* Populate code with EXC_CRASH for corpses */ code[0] = EXC_CRASH; code[1] = 0; @@ -1621,11 +1848,8 @@ task_deliver_crash_notification(task_t task, thread_t thread, mach_exception_dat if (corpse_for_fatal_memkill) { code[1] = subcode; } - } else { - /* Populate code with address and length for EXC_CRASH */ - code[0] = crash_info->kcd_addr_begin; - code[1] = crash_info->kcd_length; } + queue_iterate(&task->threads, th_iter, thread_t, task_threads) { if (th_iter->corpse_dup == FALSE) { @@ -1710,14 +1934,25 @@ task_mark_corpse(task_t task) thread_t self_thread; (void) self_thread; wait_interrupt_t wsave; +#if CONFIG_MACF + struct label *crash_label = NULL; +#endif assert(task != kernel_task); assert(task == current_task()); assert(!task_is_a_corpse(task)); - kr = task_collect_crash_info(task, (struct proc*)task->bsd_info, FALSE); +#if CONFIG_MACF + crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info); +#endif + + kr = task_collect_crash_info(task, +#if CONFIG_MACF + crash_label, +#endif + FALSE); if (kr != KERN_SUCCESS) { - return kr; + goto out; } self_thread = current_thread(); @@ -1727,6 +1962,7 @@ task_mark_corpse(task_t task) task_set_corpse_pending_report(task); task_set_corpse(task); + task->crashed_thread_id = thread_tid(self_thread); kr = task_start_halt_locked(task, TRUE); assert(kr == KERN_SUCCESS); @@ -1748,6 +1984,11 @@ task_mark_corpse(task_t task) (void) thread_interrupt_level(wsave); assert(task->halting == TRUE); + +out: +#if CONFIG_MACF + mac_exc_free_label(crash_label); +#endif return kr; } @@ -1836,7 +2077,6 @@ task_duplicate_map_and_threads( void *p, task_t new_task, thread_t *thread_ret, - int is64bit, uint64_t **udata_buffer, int *size, int *num_udata) @@ -1844,7 +2084,7 @@ task_duplicate_map_and_threads( kern_return_t kr = KERN_SUCCESS; int active; thread_t thread, self, thread_return = THREAD_NULL; - thread_t new_thread = THREAD_NULL; + thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL; thread_t *thread_array; uint32_t active_thread_count = 0, array_count = 0, i; vm_map_t oldmap; @@ -1873,32 +2113,37 @@ task_duplicate_map_and_threads( * * Skip it. */ +#if DEVELOPMENT || DEBUG + memorystatus_abort_vm_map_fork(task); +#endif task_resume_internal(task); return KERN_FAILURE; } - /* Setup new task's vmmap, switch from parent task's map to it COW map */ - oldmap = new_task->map; - new_task->map = vm_map_fork(new_task->ledger, - task->map, - (VM_MAP_FORK_SHARE_IF_INHERIT_NONE | - VM_MAP_FORK_PRESERVE_PURGEABLE)); - vm_map_deallocate(oldmap); - - if (is64bit) { - vm_map_set_64bit(get_task_map(new_task)); - } else { - vm_map_set_32bit(get_task_map(new_task)); - } - - /* Get all the udata pointers from kqueue */ - est_knotes = proc_list_uptrs(p, NULL, 0); - if (est_knotes > 0) { - buf_size = (est_knotes + 32) * sizeof(uint64_t); - buffer = (uint64_t *) kalloc(buf_size); - num_knotes = proc_list_uptrs(p, buffer, buf_size); - if (num_knotes > est_knotes + 32) { - num_knotes = est_knotes + 32; + /* Check with VM if vm_map_fork is allowed for this task */ + if (memorystatus_allowed_vm_map_fork(task)) { + + /* Setup new task's vmmap, switch from parent task's map to it COW map */ + oldmap = new_task->map; + new_task->map = vm_map_fork(new_task->ledger, + task->map, + (VM_MAP_FORK_SHARE_IF_INHERIT_NONE | + VM_MAP_FORK_PRESERVE_PURGEABLE | + VM_MAP_FORK_CORPSE_FOOTPRINT)); + vm_map_deallocate(oldmap); + + /* copy ledgers that impact the memory footprint */ + vm_map_copy_footprint_ledgers(task, new_task); + + /* Get all the udata pointers from kqueue */ + est_knotes = kevent_proc_copy_uptrs(p, NULL, 0); + if (est_knotes > 0) { + buf_size = (est_knotes + 32) * sizeof(uint64_t); + buffer = (uint64_t *) kalloc(buf_size); + num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size); + if (num_knotes > est_knotes + 32) { + num_knotes = est_knotes + 32; + } } } @@ -1941,6 +2186,9 @@ task_duplicate_map_and_threads( /* Equivalent of current thread in corpse */ if (thread_array[i] == self) { thread_return = new_thread; + new_task->crashed_thread_id = thread_tid(new_thread); + } else if (first_thread == NULL) { + first_thread = new_thread; } else { /* drop the extra ref returned by thread_create_with_continuation */ thread_deallocate(new_thread); @@ -1956,9 +2204,19 @@ task_duplicate_map_and_threads( /* Copy thread name */ bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread); + new_thread->thread_tag = thread_array[i]->thread_tag; thread_copy_resource_info(new_thread, thread_array[i]); } + /* return the first thread if we couldn't find the equivalent of current */ + if (thread_return == THREAD_NULL) { + thread_return = first_thread; + } + else if (first_thread != THREAD_NULL) { + /* drop the extra ref returned by thread_create_with_continuation */ + thread_deallocate(first_thread); + } + task_resume_internal(task); for (i = 0; i < array_count; i++) { @@ -2025,6 +2283,10 @@ task_terminate_internal( } task->task_could_use_secluded_mem = FALSE; task->task_could_also_use_secluded_mem = FALSE; + + if (task->task_suppressed_secluded) { + stop_secluded_suppression(task); + } #endif /* CONFIG_SECLUDED_MEMORY */ if (!task->active) { @@ -2106,6 +2368,13 @@ task_terminate_internal( // PR-17045188: Revisit implementation // task_partial_reap(task, pid); +#if CONFIG_EMBEDDED + /* + * remove all task watchers + */ + task_removewatchers(task); + +#endif /* CONFIG_EMBEDDED */ /* * Destroy all synchronizers owned by the task. @@ -2146,16 +2415,6 @@ task_terminate_internal( vm_map_disable_hole_optimization(task->map); vm_map_unlock(task->map); - vm_map_remove(task->map, - task->map->min_offset, - task->map->max_offset, - /* no unnesting on final cleanup: */ - VM_MAP_REMOVE_NO_UNNESTING); - - /* release our shared region */ - vm_shared_region_set(task, NULL); - - #if MACH_ASSERT /* * Identify the pmap's process, in case the pmap ledgers drift @@ -2172,6 +2431,23 @@ task_terminate_internal( pmap_set_process(task->map->pmap, pid, procname); #endif /* MACH_ASSERT */ + vm_map_remove(task->map, + task->map->min_offset, + task->map->max_offset, + /* + * Final cleanup: + * + no unnesting + * + remove immutable mappings + * + allow gaps in range + */ + (VM_MAP_REMOVE_NO_UNNESTING | + VM_MAP_REMOVE_IMMUTABLE | + VM_MAP_REMOVE_GAPS_OK)); + + /* release our shared region */ + vm_shared_region_set(task, NULL); + + lck_mtx_lock(&tasks_threads_lock); queue_remove(&tasks, task, task_t, tasks); queue_enter(&terminated_tasks, task, task_t, tasks); @@ -2185,11 +2461,11 @@ task_terminate_internal( */ thread_interrupt_level(interrupt_save); -#if KPERF +#if KPC /* force the task to release all ctrs */ - if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS) + if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) kpc_force_all_ctrs(task, 0); -#endif +#endif /* KPC */ #if CONFIG_COALITIONS /* @@ -2351,8 +2627,15 @@ task_complete_halt(task_t task) */ vm_map_remove(task->map, task->map->min_offset, task->map->max_offset, - /* no unnesting on final cleanup: */ - VM_MAP_REMOVE_NO_UNNESTING); + /* + * Final cleanup: + * + no unnesting + * + remove immutable mappings + * + allow gaps in the range + */ + (VM_MAP_REMOVE_NO_UNNESTING | + VM_MAP_REMOVE_IMMUTABLE | + VM_MAP_REMOVE_GAPS_OK)); /* * Kick out any IOKitUser handles to the task. At best they're stale, @@ -2368,7 +2651,7 @@ task_complete_halt(task_t task) * This is a recursive-style suspension of the task, a count of * suspends is maintained. * - * CONDITIONS: the task is locked and active. + * CONDITIONS: the task is locked and active. */ void task_hold_locked( @@ -2381,6 +2664,10 @@ task_hold_locked( if (task->suspend_count++ > 0) return; + if (task->bsd_info) { + workq_proc_suspended(task->bsd_info); + } + /* * Iterate through all the threads and hold them. */ @@ -2495,6 +2782,10 @@ task_release_locked( if (--task->suspend_count > 0) return; + if (task->bsd_info) { + workq_proc_resumed(task->bsd_info); + } + queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); thread_release(thread); @@ -3209,7 +3500,7 @@ task_disconnect_page_mappings(task_t task) * Conditions: * The caller holds a reference to the task */ -extern void vm_wake_compactor_swapper(); +extern void vm_wake_compactor_swapper(void); extern queue_head_t c_swapout_list_head; kern_return_t @@ -3220,8 +3511,9 @@ task_freeze( uint32_t *clean_count, uint32_t *dirty_count, uint32_t dirty_budget, - boolean_t *shared, - boolean_t walk_only) + uint32_t *shared_count, + int *freezer_error_code, + boolean_t eval_only) { kern_return_t kr = KERN_SUCCESS; @@ -3246,22 +3538,29 @@ task_freeze( task_unlock(task); - if (walk_only) { - panic("task_freeze - walk_only == TRUE"); - } else { - kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared); - } + kr = vm_map_freeze(task->map, + purgeable_count, + wired_count, + clean_count, + dirty_count, + dirty_budget, + shared_count, + freezer_error_code, + eval_only); task_lock(task); - if (walk_only == FALSE && kr == KERN_SUCCESS) + if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) { task->frozen = TRUE; + } + task->changing_freeze_state = FALSE; thread_wakeup(&task->changing_freeze_state); task_unlock(task); - if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { + if (VM_CONFIG_COMPRESSOR_IS_PRESENT && + (eval_only == FALSE)) { vm_wake_compactor_swapper(); /* * We do an explicit wakeup of the swapout thread here @@ -3349,22 +3648,13 @@ host_security_set_task_token( kern_return_t task_send_trace_memory( - task_t target_task, + __unused task_t target_task, __unused uint32_t pid, __unused uint64_t uniqueid) { - kern_return_t kr = KERN_INVALID_ARGUMENT; - if (target_task == TASK_NULL) - return (KERN_INVALID_ARGUMENT); - -#if CONFIG_ATM - kr = atm_send_proc_inspect_notification(target_task, - pid, - uniqueid); - -#endif - return (kr); + return KERN_INVALID_ARGUMENT; } + /* * This routine was added, pretty much exclusively, for registering the * RPC glue vector for in-kernel short circuited tasks. Rather than @@ -3433,6 +3723,9 @@ task_info( case TASK_BASIC_INFO_32: case TASK_BASIC2_INFO_32: +#if defined(__arm__) || defined(__arm64__) + case TASK_BASIC_INFO_64: +#endif { task_basic_info_32_t basic_info; vm_map_t map; @@ -3477,19 +3770,20 @@ task_info( break; } - case TASK_BASIC_INFO_64: +#if defined(__arm__) || defined(__arm64__) + case TASK_BASIC_INFO_64_2: { - task_basic_info_64_t basic_info; + task_basic_info_64_2_t basic_info; vm_map_t map; clock_sec_t secs; clock_usec_t usecs; - if (*task_info_count < TASK_BASIC_INFO_64_COUNT) { + if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) { error = KERN_INVALID_ARGUMENT; break; } - basic_info = (task_basic_info_64_t)task_info_out; + basic_info = (task_basic_info_64_2_t)task_info_out; map = (task == kernel_task)? kernel_map: task->map; basic_info->virtual_size = map->size; @@ -3511,18 +3805,58 @@ task_info( (typeof(basic_info->system_time.seconds))secs; basic_info->system_time.microseconds = usecs; - *task_info_count = TASK_BASIC_INFO_64_COUNT; + *task_info_count = TASK_BASIC_INFO_64_2_COUNT; break; } - case MACH_TASK_BASIC_INFO: +#else /* defined(__arm__) || defined(__arm64__) */ + case TASK_BASIC_INFO_64: { - mach_task_basic_info_t basic_info; - vm_map_t map; - clock_sec_t secs; - clock_usec_t usecs; + task_basic_info_64_t basic_info; + vm_map_t map; + clock_sec_t secs; + clock_usec_t usecs; - if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) { + if (*task_info_count < TASK_BASIC_INFO_64_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + + basic_info = (task_basic_info_64_t)task_info_out; + + map = (task == kernel_task)? kernel_map: task->map; + basic_info->virtual_size = map->size; + basic_info->resident_size = + (mach_vm_size_t)(pmap_resident_count(map->pmap)) + * PAGE_SIZE_64; + + basic_info->policy = ((task != kernel_task)? + POLICY_TIMESHARE: POLICY_RR); + basic_info->suspend_count = task->user_stop_count; + + absolutetime_to_microtime(task->total_user_time, &secs, &usecs); + basic_info->user_time.seconds = + (typeof(basic_info->user_time.seconds))secs; + basic_info->user_time.microseconds = usecs; + + absolutetime_to_microtime(task->total_system_time, &secs, &usecs); + basic_info->system_time.seconds = + (typeof(basic_info->system_time.seconds))secs; + basic_info->system_time.microseconds = usecs; + + *task_info_count = TASK_BASIC_INFO_64_COUNT; + break; + } +#endif /* defined(__arm__) || defined(__arm64__) */ + + case MACH_TASK_BASIC_INFO: + { + mach_task_basic_info_t basic_info; + vm_map_t map; + clock_sec_t secs; + clock_usec_t usecs; + + if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) { error = KERN_INVALID_ARGUMENT; break; } @@ -3583,7 +3917,7 @@ task_info( if (thread->options & TH_OPT_IDLE_THREAD) continue; - thread_read_times(thread, &user_time, &system_time); + thread_read_times(thread, &user_time, &system_time, NULL); time_value_add(×_info->user_time, &user_time); time_value_add(×_info->system_time, &system_time); @@ -3667,7 +4001,7 @@ task_info( /* only set format on output for those expecting it */ if (*task_info_count >= TASK_DYLD_INFO_COUNT) { - info->all_image_info_format = task_has_64BitAddr(task) ? + info->all_image_info_format = task_has_64Bit_addr(task) ? TASK_DYLD_ALL_IMAGE_INFO_64 : TASK_DYLD_ALL_IMAGE_INFO_32 ; *task_info_count = TASK_DYLD_INFO_COUNT; @@ -3927,14 +4261,12 @@ task_info( case TASK_POWER_INFO_V2: { - if (*task_info_count < TASK_POWER_INFO_V2_COUNT) { + if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) { error = KERN_INVALID_ARGUMENT; break; } task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out; - - uint64_t *task_energy = NULL; - task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, task_energy); + task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2); break; } @@ -4137,7 +4469,7 @@ task_info( flags_info = (task_flags_info_t)task_info_out; /* only publish the 64-bit flag of the task */ - flags_info->flags = task->t_flags & TF_64B_ADDR; + flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA); *task_info_count = TASK_FLAGS_INFO_COUNT; break; @@ -4161,6 +4493,8 @@ task_info( if (task->itk_space){ dbg_info->ipc_space_size = task->itk_space->is_table_size; } + + dbg_info->suspend_count = task->suspend_count; error = KERN_SUCCESS; *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT; @@ -4178,6 +4512,41 @@ task_info( return (error); } +/* + * task_info_from_user + * + * When calling task_info from user space, + * this function will be executed as mig server side + * instead of calling directly into task_info. + * This gives the possibility to perform more security + * checks on task_port. + * + * In the case of TASK_DYLD_INFO, we require the more + * privileged task_port not the less-privileged task_name_port. + * + */ +kern_return_t +task_info_from_user( + mach_port_t task_port, + task_flavor_t flavor, + task_info_t task_info_out, + mach_msg_type_number_t *task_info_count) +{ + task_t task; + kern_return_t ret; + + if (flavor == TASK_DYLD_INFO) + task = convert_port_to_task(task_port); + else + task = convert_port_to_task_name(task_port); + + ret = task_info(task, flavor, task_info_out, task_info_count); + + task_deallocate(task); + + return ret; +} + /* * task_power_info * @@ -4189,7 +4558,7 @@ task_power_info_locked( task_t task, task_power_info_t info, gpu_energy_data_t ginfo, - uint64_t *task_energy) + task_power_info_v2_t infov2) { thread_t thread; ledger_amount_t tmp; @@ -4207,14 +4576,21 @@ task_power_info_locked( info->total_user = task->total_user_time; info->total_system = task->total_system_time; - if (task_energy) { - *task_energy = task->task_energy; +#if CONFIG_EMBEDDED + if (infov2) { + infov2->task_energy = task->task_energy; } +#endif if (ginfo) { ginfo->task_gpu_utilisation = task->task_gpu_ns; } + if (infov2) { + infov2->task_ptime = task->total_ptime; + infov2->task_pset_switches = task->ps_switch; + } + queue_iterate(&task->threads, thread, thread_t, task_threads) { uint64_t tval; spl_t x; @@ -4228,13 +4604,21 @@ task_power_info_locked( info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; - if (task_energy) { - *task_energy += ml_energy_stat(thread); +#if CONFIG_EMBEDDED + if (infov2) { + infov2->task_energy += ml_energy_stat(thread); } +#endif tval = timer_grab(&thread->user_timer); info->total_user += tval; + if (infov2) { + tval = timer_grab(&thread->ptime); + infov2->task_ptime += tval; + infov2->task_pset_switches += thread->ps_switch; + } + tval = timer_grab(&thread->system_timer); if (thread->precise_user_kernel_time) { info->total_system += tval; @@ -4262,6 +4646,7 @@ task_gpu_utilisation( task_t task) { uint64_t gpu_time = 0; +#if !CONFIG_EMBEDDED thread_t thread; task_lock(task); @@ -4277,6 +4662,10 @@ task_gpu_utilisation( } task_unlock(task); +#else /* CONFIG_EMBEDDED */ + /* silence compiler warning */ + (void)task; +#endif /* !CONFIG_EMBEDDED */ return gpu_time; } @@ -4309,6 +4698,61 @@ task_energy( return energy; } + +uint64_t +task_cpu_ptime( + __unused task_t task) +{ + return 0; +} + + +/* This function updates the cpu time in the arrays for each + * effective and requested QoS class + */ +void +task_update_cpu_time_qos_stats( + task_t task, + uint64_t *eqos_stats, + uint64_t *rqos_stats) +{ + if (!eqos_stats && !rqos_stats) { + return; + } + + task_lock(task); + thread_t thread; + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (thread->options & TH_OPT_IDLE_THREAD) { + continue; + } + + thread_update_qos_cpu_time(thread); + } + + if (eqos_stats) { + eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default; + eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance; + eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background; + eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility; + eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy; + eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; + eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; + } + + if (rqos_stats) { + rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default; + rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance; + rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background; + rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility; + rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy; + rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; + rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; + } + + task_unlock(task); +} + kern_return_t task_purgable_info( task_t task, @@ -4624,7 +5068,129 @@ task_get_state( return ret; } + +static kern_return_t __attribute__((noinline,not_tail_called)) +PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND( + mach_exception_code_t code, + mach_exception_subcode_t subcode, + void *reason) +{ +#ifdef MACH_BSD + if (1 == proc_selfpid()) + return KERN_NOT_SUPPORTED; // initproc is immune +#endif + mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = { + [0] = code, + [1] = subcode, + }; + task_t task = current_task(); + kern_return_t kr; + + /* (See jetsam-related comments below) */ + + proc_memstat_terminated(task->bsd_info, TRUE); + kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason); + proc_memstat_terminated(task->bsd_info, FALSE); + return kr; +} + +kern_return_t +task_violated_guard( + mach_exception_code_t code, + mach_exception_subcode_t subcode, + void *reason) +{ + return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason); +} + + #if CONFIG_MEMORYSTATUS + +boolean_t +task_get_memlimit_is_active(task_t task) +{ + assert (task != NULL); + + if (task->memlimit_is_active == 1) { + return(TRUE); + } else { + return (FALSE); + } +} + +void +task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active) +{ + assert (task != NULL); + + if (memlimit_is_active) { + task->memlimit_is_active = 1; + } else { + task->memlimit_is_active = 0; + } +} + +boolean_t +task_get_memlimit_is_fatal(task_t task) +{ + assert(task != NULL); + + if (task->memlimit_is_fatal == 1) { + return(TRUE); + } else { + return(FALSE); + } +} + +void +task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal) +{ + assert (task != NULL); + + if (memlimit_is_fatal) { + task->memlimit_is_fatal = 1; + } else { + task->memlimit_is_fatal = 0; + } +} + +boolean_t +task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active) +{ + boolean_t triggered = FALSE; + + assert(task == current_task()); + + /* + * Returns true, if task has already triggered an exc_resource exception. + */ + + if (memlimit_is_active) { + triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE); + } else { + triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE); + } + + return(triggered); +} + +void +task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active) +{ + assert(task == current_task()); + + /* + * We allow one exc_resource per process per active/inactive limit. + * The limit's fatal attribute does not come into play. + */ + + if (memlimit_is_active) { + task->memlimit_active_exc_resource = 1; + } else { + task->memlimit_inactive_exc_resource = 1; + } +} + #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation void __attribute__((noinline)) @@ -4634,6 +5200,7 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, int pid = 0; const char *procname = "unknown"; mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + boolean_t send_sync_exc_resource = FALSE; #ifdef MACH_BSD pid = proc_selfpid(); @@ -4646,8 +5213,10 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, return; } - if (task->bsd_info != NULL) + if (task->bsd_info != NULL) { procname = proc_name_address(current_task()->bsd_info); + send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info); + } #endif #if CONFIG_COREDUMP if (hwm_user_cores) { @@ -4691,18 +5260,18 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, */ proc_memstat_terminated(current_task()->bsd_info, TRUE); - printf("process %s[%d] crossed memory high watermark (%d MB); sending " - "EXC_RESOURCE.\n", procname, pid, max_footprint_mb); - code[0] = code[1] = 0; EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY); EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK); EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb); - /* Do not generate a corpse fork if the violation is a fatal one */ - if (is_fatal || exc_via_corpse_forking == 0) { - /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */ - if (corpse_for_fatal_memkill == 0) { + /* + * Do not generate a corpse fork if the violation is a fatal one + * or the process wants synchronous EXC_RESOURCE exceptions. + */ + if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) { + /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */ + if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) { /* * Use the _internal_ variant so that no user-space * process can resume our task from under us. @@ -4712,7 +5281,13 @@ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, task_resume_internal(task); } } else { - task_enqueue_exception_with_corpse(task, code, EXCEPTION_CODE_MAX); + if (audio_active) { + printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE " + "supressed due to audio playback.\n", procname, pid, max_footprint_mb); + } else { + task_enqueue_exception_with_corpse(task, EXC_RESOURCE, + code, EXCEPTION_CODE_MAX, NULL); + } } /* @@ -4731,38 +5306,50 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const { ledger_amount_t max_footprint, max_footprint_mb; task_t task; - boolean_t is_fatal; - boolean_t trigger_exception; + boolean_t is_warning; + boolean_t memlimit_is_active; + boolean_t memlimit_is_fatal; if (warning == LEDGER_WARNING_DIPPED_BELOW) { /* * Task memory limits only provide a warning on the way up. */ return; - } + } else if (warning == LEDGER_WARNING_ROSE_ABOVE) { + /* + * This task is in danger of violating a memory limit, + * It has exceeded a percentage level of the limit. + */ + is_warning = TRUE; + } else { + /* + * The task has exceeded the physical footprint limit. + * This is not a warning but a true limit violation. + */ + is_warning = FALSE; + } task = current_task(); ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint); max_footprint_mb = max_footprint >> 20; - /* - * Capture the trigger exception flag before turning off the exception. - */ - trigger_exception = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION ? TRUE : FALSE; - - is_fatal = memorystatus_turnoff_exception_and_get_fatalness((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE, (int)max_footprint_mb); + memlimit_is_active = task_get_memlimit_is_active(task); + memlimit_is_fatal = task_get_memlimit_is_fatal(task); /* - * If this an actual violation (not a warning), - * generate a non-fatal high watermark EXC_RESOURCE. + * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception. + * We only generate the exception once per process per memlimit (active/inactive limit). + * To enforce this, we monitor state based on the memlimit's active/inactive attribute + * and we disable it by marking that memlimit as exception triggered. */ - if ((warning == 0) && trigger_exception) { - PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, is_fatal); + if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) { + PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal); + memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal); + task_mark_has_triggered_exc_resource(task, memlimit_is_active); } - memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE, - is_fatal); + memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal); } extern int proc_check_footprint_priv(void); @@ -4775,11 +5362,21 @@ task_set_phys_footprint_limit( { kern_return_t error; + boolean_t memlimit_is_active; + boolean_t memlimit_is_fatal; + if ((error = proc_check_footprint_priv())) { return (KERN_NO_ACCESS); } - return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE); + /* + * This call should probably be obsoleted. + * But for now, we default to current state. + */ + memlimit_is_active = task_get_memlimit_is_active(task); + memlimit_is_fatal = task_get_memlimit_is_fatal(task); + + return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal); } kern_return_t @@ -4809,19 +5406,21 @@ task_set_phys_footprint_limit_internal( task_t task, int new_limit_mb, int *old_limit_mb, - boolean_t trigger_exception) + boolean_t memlimit_is_active, + boolean_t memlimit_is_fatal) { ledger_amount_t old; ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old); + + /* + * Check that limit >> 20 will not give an "unexpected" 32-bit + * result. There are, however, implicit assumptions that -1 mb limit + * equates to LEDGER_LIMIT_INFINITY. + */ + assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY)); if (old_limit_mb) { - /* - * Check that limit >> 20 will not give an "unexpected" 32-bit - * result. There are, however, implicit assumptions that -1 mb limit - * equates to LEDGER_LIMIT_INFINITY. - */ - assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY)); *old_limit_mb = (int)(old >> 20); } @@ -4832,6 +5431,12 @@ task_set_phys_footprint_limit_internal( ledger_set_limit(task->ledger, task_ledgers.phys_footprint, max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY, max_task_footprint ? max_task_footprint_warning_level : 0); + + task_lock(task); + task_set_memlimit_is_active(task, memlimit_is_active); + task_set_memlimit_is_fatal(task, memlimit_is_fatal); + task_unlock(task); + return (KERN_SUCCESS); } @@ -4841,17 +5446,25 @@ task_set_phys_footprint_limit_internal( task_lock(task); - if (trigger_exception) { - task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION; - } else { - task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION; + if ((memlimit_is_active == task_get_memlimit_is_active(task)) && + (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) && + (((ledger_amount_t)new_limit_mb << 20) == old)) { + /* + * memlimit state is not changing + */ + task_unlock(task); + return(KERN_SUCCESS); } + task_set_memlimit_is_active(task, memlimit_is_active); + task_set_memlimit_is_fatal(task, memlimit_is_fatal); + ledger_set_limit(task->ledger, task_ledgers.phys_footprint, (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL); if (task == current_task()) { - ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint); + ledger_check_new_balance(current_thread(), task->ledger, + task_ledgers.phys_footprint); } task_unlock(task); @@ -4896,6 +5509,17 @@ task_get_phys_footprint_limit( } #endif /* CONFIG_MEMORYSTATUS */ +void +task_set_thread_limit(task_t task, uint16_t thread_limit) +{ + assert(task != kernel_task); + if (thread_limit <= TASK_MAX_THREAD_LIMIT) { + task_lock(task); + task->task_thread_limit = thread_limit; + task_unlock(task); + } +} + /* * We need to export some functions to other components that * are currently implemented in macros within the osfmk @@ -4982,6 +5606,18 @@ task_findtid(task_t task, uint64_t tid) return (found_thread); } +int pid_from_task(task_t task) +{ + int pid = -1; + + if (task->bsd_info) { + pid = proc_pid(task->bsd_info); + } else { + pid = task_pid(task); + } + + return pid; +} /* * Control the CPU usage monitor for a task. @@ -5135,7 +5771,7 @@ SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void) fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON; trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei); - printf("process %s[%d] caught waking the CPU %llu times " + os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times " "over ~%llu seconds, averaging %llu wakes / second and " "violating a %slimit of %llu wakes over %llu seconds.\n", procname, pid, @@ -5158,12 +5794,12 @@ SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void) return; } if (audio_active) { - printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " + os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " "supressed due to audio playback\n", procname, pid); return; } if (lei.lei_last_refill == 0) { - printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " + os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " "supressed due to lei.lei_last_refill = 0 \n", procname, pid); } @@ -5321,7 +5957,7 @@ void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO if (flavor == FLAVOR_IO_LOGICAL_WRITES) { trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei); } - printf("process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n", + os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n", pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC)); kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone); @@ -5367,15 +6003,18 @@ task_set_mach_voucher( kern_return_t task_swap_mach_voucher( - task_t task, - ipc_voucher_t new_voucher, - ipc_voucher_t *in_out_old_voucher) + __unused task_t task, + __unused ipc_voucher_t new_voucher, + ipc_voucher_t *in_out_old_voucher) { - if (TASK_NULL == task) - return KERN_INVALID_TASK; - - *in_out_old_voucher = new_voucher; - return KERN_SUCCESS; + /* + * Currently this function is only called from a MIG generated + * routine which doesn't release the reference on the voucher + * addressed by in_out_old_voucher. To avoid leaking this reference, + * a call to release it has been added here. + */ + ipc_voucher_release(*in_out_old_voucher); + return KERN_NOT_SUPPORTED; } void task_set_gpu_denied(task_t task, boolean_t denied) @@ -5449,7 +6088,16 @@ kdebug_trace_dyld(task_t task, uint32_t base_code, vm_map_offset_t map_data; vm_offset_t data; - assert(infos_copy != NULL); + if (!infos_copy) { + return KERN_INVALID_ADDRESS; + } + + if (!kdebug_enable || + !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) + { + vm_map_copy_discard(infos_copy); + return KERN_SUCCESS; + } if (task == NULL || task != current_task()) { return KERN_INVALID_TASK; @@ -5525,6 +6173,57 @@ task_register_dyld_get_process_state(__unused task_t task, return KERN_NOT_SUPPORTED; } +kern_return_t +task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor, + task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out) +{ +#if MONOTONIC + task_t task = (task_t)task_insp; + kern_return_t kr = KERN_SUCCESS; + mach_msg_type_number_t size; + + if (task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + + size = *size_in_out; + + switch (flavor) { + case TASK_INSPECT_BASIC_COUNTS: { + struct task_inspect_basic_counts *bc; + uint64_t task_counts[MT_CORE_NFIXED] = { 0 }; + + if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + mt_fixed_task_counts(task, task_counts); + bc = (struct task_inspect_basic_counts *)info_out; +#ifdef MT_CORE_INSTRS + bc->instructions = task_counts[MT_CORE_INSTRS]; +#else /* defined(MT_CORE_INSTRS) */ + bc->instructions = 0; +#endif /* !defined(MT_CORE_INSTRS) */ + bc->cycles = task_counts[MT_CORE_CYCLES]; + size = TASK_INSPECT_BASIC_COUNTS_COUNT; + break; + } + default: + kr = KERN_INVALID_ARGUMENT; + break; + } + + if (kr == KERN_SUCCESS) { + *size_in_out = size; + } + return kr; +#else /* MONOTONIC */ +#pragma unused(task_insp, flavor, info_out, size_in_out) + return KERN_NOT_SUPPORTED; +#endif /* !MONOTONIC */ +} + #if CONFIG_SECLUDED_MEMORY int num_tasks_can_use_secluded_mem = 0; @@ -5581,7 +6280,8 @@ task_set_could_also_use_secluded_mem( boolean_t task_can_use_secluded_mem( - task_t task) + task_t task, + boolean_t is_alloc) { if (task->task_can_use_secluded_mem) { assert(task->task_could_use_secluded_mem); @@ -5593,6 +6293,20 @@ task_can_use_secluded_mem( assert(num_tasks_can_use_secluded_mem > 0); return TRUE; } + + /* + * If a single task is using more than some amount of + * memory, allow it to dip into secluded and also begin + * suppression of secluded memory until the tasks exits. + */ + if (is_alloc && secluded_shutoff_trigger != 0) { + uint64_t phys_used = get_task_phys_footprint(task); + if (phys_used > secluded_shutoff_trigger) { + start_secluded_suppression(task); + return TRUE; + } + } + return FALSE; } @@ -5609,3 +6323,76 @@ task_io_user_clients(task_t task) { return (&task->io_user_clients); } + +void +task_copy_fields_for_exec(task_t dst_task, task_t src_task) +{ + dst_task->vtimers = src_task->vtimers; +} + +#if DEVELOPMENT || DEBUG +int vm_region_footprint = 0; +#endif /* DEVELOPMENT || DEBUG */ + +boolean_t +task_self_region_footprint(void) +{ +#if DEVELOPMENT || DEBUG + if (vm_region_footprint) { + /* system-wide override */ + return TRUE; + } +#endif /* DEVELOPMENT || DEBUG */ + return current_task()->task_region_footprint; +} + +void +task_self_region_footprint_set( + boolean_t newval) +{ + task_t curtask; + + curtask = current_task(); + task_lock(curtask); + if (newval) { + curtask->task_region_footprint = TRUE; + } else { + curtask->task_region_footprint = FALSE; + } + task_unlock(curtask); +} + +void +task_set_darkwake_mode(task_t task, boolean_t set_mode) +{ + assert(task); + + task_lock(task); + + if (set_mode) { + task->t_flags |= TF_DARKWAKE_MODE; + } else { + task->t_flags &= ~(TF_DARKWAKE_MODE); + } + + task_unlock(task); +} + +boolean_t +task_get_darkwake_mode(task_t task) +{ + assert(task); + return ((task->t_flags & TF_DARKWAKE_MODE) != 0); +} + +#if __arm64__ +void +task_set_legacy_footprint( + task_t task, + boolean_t new_val) +{ + task_lock(task); + task->task_legacy_footprint = new_val; + task_unlock(task); +} +#endif /* __arm64__ */