X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/15129b1c8dbb3650c63b70adb1cad9af601c6c17..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/osfmk/kern/task.c diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index aa5d2b360..999dfefcf 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -86,9 +86,6 @@ * Copyright (c) 2005 SPARTA, Inc. */ -#include -#include - #include #include #include @@ -98,6 +95,7 @@ #include #include +#include #include #include #include @@ -108,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -140,14 +139,9 @@ #include #include #include -#include #include -#if CONFIG_MACF_MACH -#include -#endif - #if CONFIG_COUNTERS #include #endif /* CONFIG_COUNTERS */ @@ -155,6 +149,18 @@ #include #include +#if CONFIG_ATM +#include +#endif + +#include + +#if KPERF +extern int kpc_force_all_ctrs(task_t, int); +#endif + +uint32_t qos_override_mode; + task_t kernel_task; zone_t task_zone; lck_attr_t task_lck_attr; @@ -171,8 +177,16 @@ zinfo_usage_store_t tasks_tkm_shared; expired_task_statistics_t dead_task_statistics; lck_spin_t dead_task_statistics_lock; -static ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +ledger_template_t task_ledger_template = NULL; + +struct _task_ledger_indices task_ledgers __attribute__((used)) = + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + { 0 /* initialized at runtime */}, +#ifdef CONFIG_BANK + -1, -1, +#endif + }; + void init_task_ledgers(void); void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1); void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1); @@ -207,6 +221,10 @@ int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */ int max_task_footprint = 0; /* Per-task limit on physical memory consumption */ +#if MACH_ASSERT +int pmap_ledgers_panic = 1; +#endif /* MACH_ASSERT */ + int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ int hwm_user_cores = 0; /* high watermark violations generate user core files */ @@ -220,6 +238,9 @@ extern char *proc_name_address(struct proc *p); extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); #endif #endif +#if MACH_ASSERT +extern int pmap_ledgers_panic; +#endif /* MACH_ASSERT */ /* Forwards */ @@ -254,9 +275,9 @@ task_set_64bit( task_t task, boolean_t is64bit) { -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) thread_t thread; -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */ task_lock(task); @@ -276,13 +297,13 @@ task_set_64bit( * state with respect to its task's 64-bitness. */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); machine_thread_switch_addrmode(thread); thread_mtx_unlock(thread); } -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */ out: task_unlock(task); @@ -298,6 +319,18 @@ task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size) task_unlock(task); } +void +task_atm_reset(__unused task_t task) { + +#if CONFIG_ATM + if (task->atm_context != NULL) { + atm_task_descriptor_destroy(task->atm_context); + task->atm_context = NULL; + } +#endif + +} + #if TASK_REFERENCE_LEAK_DEBUG #include @@ -365,20 +398,21 @@ task_init(void) zone_change(task_zone, Z_NOENCRYPT, TRUE); /* - * Configure per-task memory limit. The boot arg takes precedence over the - * device tree. + * Configure per-task memory limit. + * The boot-arg is interpreted as Megabytes, + * and takes precedence over the device tree. + * Setting the boot-arg to 0 disables task limits. */ if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint, sizeof (max_task_footprint))) { - max_task_footprint = 0; - } - - if (max_task_footprint == 0) { /* * No limit was found in boot-args, so go look in the device tree. */ if (!PE_get_default("kern.max_task_pmem", &max_task_footprint, sizeof(max_task_footprint))) { + /* + * No limit was found in device tree. + */ max_task_footprint = 0; } } @@ -398,11 +432,22 @@ task_init(void) #endif } +#if MACH_ASSERT + PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, + sizeof (pmap_ledgers_panic)); +#endif /* MACH_ASSERT */ + if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores, sizeof (hwm_user_cores))) { hwm_user_cores = 0; } + if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) { + printf("QOS override mode: 0x%08x\n", qos_override_mode); + } else { + qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE; + } + proc_init_cpumon_params(); if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) { @@ -423,7 +468,16 @@ task_init(void) disable_exc_resource = 0; } +/* + * If we have coalitions, coalition_init() will call init_task_ledgers() as it + * sets up the ledgers for the default coalition. If we don't have coalitions, + * then we have to call it now. + */ +#if CONFIG_COALITIONS + assert(task_ledger_template); +#else /* CONFIG_COALITIONS */ init_task_ledgers(); +#endif /* CONFIG_COALITIONS */ #if TASK_REFERENCE_LEAK_DEBUG simple_lock_init(&task_ref_lock, 0); @@ -439,15 +493,16 @@ task_init(void) * Create the kernel task as the first task. */ #ifdef __LP64__ - if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS) #else - if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) #endif panic("task_init\n"); vm_map_deallocate(kernel_task->map); kernel_task->map = kernel_map; lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr); + } /* @@ -512,18 +567,28 @@ host_security_create_task_token( * * phys_footprint * Physical footprint: This is the sum of: - * + phys_mem [task's resident memory] - * + phys_compressed - * + iokit_mem + * + internal + * + internal_compressed + * + iokit_mapped + * - alternate_accounting * - * iokit_mem - * IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state]. - * - * phys_compressed - * Physical compressed: Amount of this task's resident memory which is held by the compressor. + * internal + * The task's anonymous memory, which on iOS is always resident. + * + * internal_compressed + * Amount of this task's internal memory which is held by the compressor. * Such memory is no longer actually resident for the task [i.e., resident in its pmap], * and could be either decompressed back into memory, or paged out to storage, depending * on our implementation. + * + * iokit_mapped + * IOKit mappings: The total size of all IOKit mappings in this task, regardless of + clean/dirty or internal/external state]. + * + * alternate_accounting + * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages + * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid + * double counting. */ void init_task_ledgers(void) @@ -545,26 +610,94 @@ init_task_ledgers(void) "bytes"); task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem", "bytes"); - task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings", + task_ledgers.internal = ledger_entry_add(t, "internal", "physmem", + "bytes"); + task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings", + "bytes"); + task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem", "bytes"); task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem", "bytes"); - task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem", + task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem", "bytes"); + task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes"); + task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes"); + task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes"); + task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes"); task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", "count"); task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", "count"); + + sfi_class_id_t class_id, ledger_alias; + for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) { + task_ledgers.sfi_wait_times[class_id] = -1; + } + + /* don't account for UNSPECIFIED */ + for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) { + ledger_alias = sfi_get_ledger_alias_for_class(class_id); + if (ledger_alias != SFI_CLASS_UNSPECIFIED) { + /* Check to see if alias has been registered yet */ + if (task_ledgers.sfi_wait_times[ledger_alias] != -1) { + task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias]; + } else { + /* Otherwise, initialize it first */ + task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias); + } + } else { + task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id); + } - if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) || - (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) || - (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) || - (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) || - (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) { + if (task_ledgers.sfi_wait_times[class_id] < 0) { + panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id); + } + } + +#ifdef CONFIG_BANK + task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns"); + task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns"); +#endif + + assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1); + + if ((task_ledgers.cpu_time < 0) || + (task_ledgers.tkm_private < 0) || + (task_ledgers.tkm_shared < 0) || + (task_ledgers.phys_mem < 0) || + (task_ledgers.wired_mem < 0) || + (task_ledgers.internal < 0) || + (task_ledgers.iokit_mapped < 0) || + (task_ledgers.alternate_accounting < 0) || + (task_ledgers.phys_footprint < 0) || + (task_ledgers.internal_compressed < 0) || + (task_ledgers.purgeable_volatile < 0) || + (task_ledgers.purgeable_nonvolatile < 0) || + (task_ledgers.purgeable_volatile_compressed < 0) || + (task_ledgers.purgeable_nonvolatile_compressed < 0) || + (task_ledgers.platform_idle_wakeups < 0) || + (task_ledgers.interrupt_wakeups < 0) +#ifdef CONFIG_BANK + || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) +#endif + ) { panic("couldn't create entries for task ledger template"); } ledger_track_maximum(t, task_ledgers.phys_footprint, 60); +#if MACH_ASSERT + if (pmap_ledgers_panic) { + ledger_panic_on_negative(t, task_ledgers.phys_footprint); + ledger_panic_on_negative(t, task_ledgers.internal); + ledger_panic_on_negative(t, task_ledgers.internal_compressed); + ledger_panic_on_negative(t, task_ledgers.iokit_mapped); + ledger_panic_on_negative(t, task_ledgers.alternate_accounting); + ledger_panic_on_negative(t, task_ledgers.purgeable_volatile); + ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile); + ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed); + ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed); + } +#endif /* MACH_ASSERT */ #if CONFIG_JETSAM ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL); @@ -579,6 +712,7 @@ init_task_ledgers(void) kern_return_t task_create_internal( task_t parent_task, + coalition_t parent_coalition __unused, boolean_t inherit_memory, boolean_t is_64bit, task_t *child_task) /* OUT */ @@ -605,6 +739,10 @@ task_create_internal( new_task->ledger = ledger; +#if defined(CONFIG_SCHED_MULTIQ) + new_task->sched_group = sched_group_create(); +#endif + /* if inherit_memory is true, parent_task MUST not be NULL */ if (inherit_memory) new_task->map = vm_map_fork(ledger, parent_task->map); @@ -639,6 +777,13 @@ task_create_internal( new_task->t_flags = 0; new_task->importance = 0; +#if CONFIG_ATM + new_task->atm_context = NULL; +#endif +#if CONFIG_BANK + new_task->bank_context = NULL; +#endif + zinfo_task_init(new_task); #ifdef MACH_BSD @@ -666,11 +811,6 @@ task_create_internal( queue_init(&new_task->semaphore_list); new_task->semaphores_owned = 0; -#if CONFIG_MACF_MACH - new_task->label = labelh_new(1); - mac_task_label_init (&new_task->maclabel); -#endif - ipc_task_init(new_task, parent_task); new_task->total_user_time = 0; @@ -698,6 +838,10 @@ task_create_internal( new_task->suspends_outstanding = 0; #endif +#if HYPERVISOR + new_task->hv_task_target = NULL; +#endif /* HYPERVISOR */ + new_task->low_mem_notified_warn = 0; new_task->low_mem_notified_critical = 0; @@ -705,11 +849,7 @@ task_create_internal( new_task->purged_memory_critical = 0; new_task->mem_notify_reserved = 0; #if IMPORTANCE_INHERITANCE - new_task->imp_receiver = 0; - new_task->imp_donor = 0; - new_task->imp_reserved = 0; - new_task->task_imp_assertcnt = 0; - new_task->task_imp_externcnt = 0; + new_task->task_imp_base = NULL; #endif /* IMPORTANCE_INHERITANCE */ #if defined(__x86_64__) @@ -743,11 +883,36 @@ task_create_internal( new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); #if IMPORTANCE_INHERITANCE - new_task->imp_donor = parent_task->imp_donor; + ipc_importance_task_t new_task_imp = IIT_NULL; + + if (task_is_marked_importance_donor(parent_task)) { + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_donor(new_task_imp, TRUE); + } /* Embedded doesn't want this to inherit */ - new_task->imp_receiver = parent_task->imp_receiver; + if (task_is_marked_importance_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_receiver(new_task_imp, TRUE); + } + if (task_is_marked_importance_denap_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); + } + + if (IIT_NULL != new_task_imp) { + assert(new_task->task_imp_base == new_task_imp); + ipc_importance_task_release(new_task_imp); + } #endif /* IMPORTANCE_INHERITANCE */ + new_task->priority = BASEPRI_DEFAULT; + new_task->max_priority = MAXPRI_USER; + new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype; new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg; @@ -758,6 +923,7 @@ task_create_internal( new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive; new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier; new_task->requested_policy.terminated = parent_task->requested_policy.terminated; + new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp; task_policy_create(new_task, parent_task->requested_policy.t_boosted); } else { @@ -771,21 +937,42 @@ task_create_internal( new_task->all_image_info_size = (mach_vm_size_t)0; new_task->pset_hint = PROCESSOR_SET_NULL; + + if (kernel_task == TASK_NULL) { + new_task->priority = BASEPRI_KERNEL; + new_task->max_priority = MAXPRI_KERNEL; + } else { + new_task->priority = BASEPRI_DEFAULT; + new_task->max_priority = MAXPRI_USER; + } } - if (kernel_task == TASK_NULL) { - new_task->priority = BASEPRI_KERNEL; - new_task->max_priority = MAXPRI_KERNEL; - } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) { - new_task->priority = MAXPRI_THROTTLE; - new_task->max_priority = MAXPRI_THROTTLE; + new_task->coalition = COALITION_NULL; + +#if CONFIG_COALITIONS + if (parent_coalition) { + coalition_adopt_task(parent_coalition, new_task); + } else if (parent_task && parent_task->coalition) { + coalition_adopt_task(parent_task->coalition, new_task); } else { - new_task->priority = BASEPRI_DEFAULT; - new_task->max_priority = MAXPRI_USER; + coalition_default_adopt_task(new_task); + } + + if (new_task->coalition == COALITION_NULL) { + panic("created task is not a member of any coalition"); } +#endif /* CONFIG_COALITIONS */ + + /* Allocate I/O Statistics */ + new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info)); + assert(new_task->task_io_stats != NULL); + bzero(new_task->task_io_stats, sizeof(struct io_stat_info)); + + bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats)); bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0; + new_task->task_gpu_ns = 0; lck_mtx_lock(&tasks_threads_lock); queue_enter(&tasks, new_task, task_t, tasks); tasks_count++; @@ -795,6 +982,9 @@ task_create_internal( new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); new_task->task_volatile_objects = 0; + new_task->task_nonvolatile_objects = 0; + new_task->task_purgeable_disowning = FALSE; + new_task->task_purgeable_disowned = FALSE; ipc_task_enable(new_task); @@ -802,6 +992,8 @@ task_create_internal( return(KERN_SUCCESS); } +int task_dropped_imp_count = 0; + /* * task_deallocate: * @@ -812,18 +1004,57 @@ task_deallocate( task_t task) { ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups; + uint32_t refs; if (task == TASK_NULL) return; - if (task_deallocate_internal(task) > 0) + refs = task_deallocate_internal(task); + +#if IMPORTANCE_INHERITANCE + if (refs > 1) return; + + if (refs == 1) { + /* + * If last ref potentially comes from the task's importance, + * disconnect it. But more task refs may be added before + * that completes, so wait for the reference to go to zero + * naturually (it may happen on a recursive task_deallocate() + * from the ipc_importance_disconnect_task() call). + */ + if (IIT_NULL != task->task_imp_base) + ipc_importance_disconnect_task(task); + return; + } +#else + if (refs > 0) + return; +#endif /* IMPORTANCE_INHERITANCE */ lck_mtx_lock(&tasks_threads_lock); queue_remove(&terminated_tasks, task, task_t, tasks); terminated_tasks_count--; lck_mtx_unlock(&tasks_threads_lock); + /* + * remove the reference on atm descriptor + */ + task_atm_reset(task); + +#if CONFIG_BANK + /* + * remove the reference on bank context + */ + if (task->bank_context != NULL) { + bank_task_destroy(task->bank_context); + task->bank_context = NULL; + } +#endif + + if (task->task_io_stats) + kfree(task->task_io_stats, sizeof(struct io_stat_info)); + /* * Give the machine dependent code a chance * to perform cleanup before ripping apart @@ -836,6 +1067,26 @@ task_deallocate( if (task->affinity_space) task_affinity_deallocate(task); +#if MACH_ASSERT + if (task->ledger != NULL && + task->map != NULL && + task->map->pmap != NULL && + task->map->pmap->ledger != NULL) { + assert(task->ledger == task->map->pmap->ledger); + } +#endif /* MACH_ASSERT */ + + vm_purgeable_disown(task); + assert(task->task_purgeable_disowned); + if (task->task_volatile_objects != 0 || + task->task_nonvolatile_objects != 0) { + panic("task_deallocate(%p): " + "volatile_objects=%d nonvolatile_objects=%d\n", + task, + task->task_volatile_objects, + task->task_nonvolatile_objects); + } + vm_map_deallocate(task->map); is_release(task->itk_space); @@ -844,6 +1095,10 @@ task_deallocate( ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, &platform_idle_wakeups, &debit); +#if defined(CONFIG_SCHED_MULTIQ) + sched_group_destroy(task->sched_group); +#endif + /* Accumulate statistics for dead tasks */ lck_spin_lock(&dead_task_statistics_lock); dead_task_statistics.total_user_time += task->total_user_time; @@ -858,10 +1113,6 @@ task_deallocate( lck_spin_unlock(&dead_task_statistics_lock); lck_mtx_destroy(&task->lock, &task_lck_grp); -#if CONFIG_MACF_MACH - labelh_release(task->label); -#endif - if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit, &debit)) { OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc); @@ -879,15 +1130,14 @@ task_deallocate( btlog_remove_entries_for_element(task_ref_btlog, task); #endif - if (task->task_volatile_objects) { - /* - * This task still "owns" some volatile VM objects. - * Disown them now to avoid leaving them pointing back at - * an invalid task. - */ - vm_purgeable_disown(task); - assert(task->task_volatile_objects == 0); +#if CONFIG_COALITIONS + if (!task->coalition) { + panic("deallocating task was not a member of any coalition"); } + coalition_release(task->coalition); +#endif /* CONFIG_COALITIONS */ + + task->coalition = COALITION_NULL; zfree(task_zone, task); } @@ -936,6 +1186,30 @@ task_terminate( return (task_terminate_internal(task)); } +#if MACH_ASSERT +extern int proc_pid(struct proc *); +extern void proc_name_kdp(task_t t, char *buf, int size); +#endif /* MACH_ASSERT */ + +#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */ +static void +__unused task_partial_reap(task_t task, __unused int pid) +{ + unsigned int reclaimed_resident = 0; + unsigned int reclaimed_compressed = 0; + uint64_t task_page_count; + + task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64); + + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START), + pid, task_page_count, 0, 0, 0); + + vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed); + + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END), + pid, reclaimed_resident, reclaimed_compressed, 0, 0); +} + kern_return_t task_terminate_internal( task_t task) @@ -943,6 +1217,7 @@ task_terminate_internal( thread_t thread, self; task_t self_task; boolean_t interrupt_save; + int pid = 0; assert(task != kernel_task); @@ -979,14 +1254,6 @@ task_terminate_internal( return (KERN_FAILURE); } -#if MACH_ASSERT - if (task->suspends_outstanding != 0) { - printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n", - proc_name_address(task->bsd_info), proc_pid(task->bsd_info), - task->suspends_outstanding); - } -#endif - if (self_task != task) task_unlock(self_task); @@ -1022,8 +1289,22 @@ task_terminate_internal( thread_terminate_internal(thread); } +#ifdef MACH_BSD + if (task->bsd_info != NULL) { + pid = proc_pid(task->bsd_info); + } +#endif /* MACH_BSD */ + task_unlock(task); + proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, + TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE); + + /* Early object reap phase */ + +// PR-17045188: Revisit implementation +// task_partial_reap(task, pid); + /* * Destroy all synchronizers owned by the task. @@ -1035,8 +1316,19 @@ task_terminate_internal( */ ipc_space_terminate(task->itk_space); - if (vm_map_has_4GB_pagezero(task->map)) - vm_map_clear_4GB_pagezero(task->map); +#if 00 + /* if some ledgers go negative on tear-down again... */ + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.phys_footprint); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.internal); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.internal_compressed); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.iokit_mapped); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.alternate_accounting); +#endif /* * If the current thread is a member of the task @@ -1054,6 +1346,22 @@ task_terminate_internal( /* release our shared region */ vm_shared_region_set(task, NULL); +#if MACH_ASSERT + /* + * Identify the pmap's process, in case the pmap ledgers drift + * and we have to report it. + */ + char procname[17]; + if (task->bsd_info) { + pid = proc_pid(task->bsd_info); + proc_name_kdp(task, procname, sizeof (procname)); + } else { + pid = 0; + strlcpy(procname, "", sizeof (procname)); + } + pmap_set_process(task->map->pmap, pid, procname); +#endif /* MACH_ASSERT */ + lck_mtx_lock(&tasks_threads_lock); queue_remove(&tasks, task, task_t, tasks); queue_enter(&terminated_tasks, task, task_t, tasks); @@ -1067,6 +1375,19 @@ task_terminate_internal( */ thread_interrupt_level(interrupt_save); +#if KPERF + /* force the task to release all ctrs */ + if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS) + kpc_force_all_ctrs(task, 0); +#endif + +#if CONFIG_COALITIONS + /* + * Leave our coalition. (drop activation but not reference) + */ + coalition_remove_task(task); +#endif + /* * Get rid of the task active reference on itself. */ @@ -2114,6 +2435,24 @@ host_security_set_task_token( return(kr); } +kern_return_t +task_send_trace_memory( + task_t target_task, + __unused uint32_t pid, + __unused uint64_t uniqueid) +{ + kern_return_t kr = KERN_INVALID_ARGUMENT; + if (target_task == TASK_NULL) + return (KERN_INVALID_ARGUMENT); + +#if CONFIG_ATM + kr = atm_send_proc_inspect_notification(target_task, + pid, + uniqueid); + +#endif + return (kr); +} /* * This routine was added, pretty much exclusively, for registering the * RPC glue vector for in-kernel short circuited tasks. Rather than @@ -2133,6 +2472,25 @@ task_set_info( return(KERN_INVALID_ARGUMENT); switch (flavor) { + +#if CONFIG_ATM + case TASK_TRACE_MEMORY_INFO: + { + if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) + return (KERN_INVALID_ARGUMENT); + + assert(task_info_in != NULL); + task_trace_memory_info_t mem_info; + mem_info = (task_trace_memory_info_t) task_info_in; + kern_return_t kr = atm_register_trace_memory(task, + mem_info->user_memory_address, + mem_info->buffer_size, + mem_info->mailbox_array_size); + return kr; + break; + } + +#endif default: return (KERN_INVALID_ARGUMENT); } @@ -2650,7 +3008,18 @@ task_info( break; } - task_power_info_locked(task, (task_power_info_t)task_info_out); + task_power_info_locked(task, (task_power_info_t)task_info_out, NULL); + break; + } + + case TASK_POWER_INFO_V2: + { + if (*task_info_count < TASK_POWER_INFO_V2_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out; + task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy); break; } @@ -2748,6 +3117,49 @@ task_info( break; } + case TASK_WAIT_STATE_INFO: + { + /* + * Deprecated flavor. Currently allowing some results until all users + * stop calling it. The results may not be accurate. + */ + task_wait_state_info_t wait_state_info; + uint64_t total_sfi_ledger_val = 0; + + if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + + wait_state_info = (task_wait_state_info_t) task_info_out; + + wait_state_info->total_wait_state_time = 0; + bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved)); + + int i, prev_lentry = -1; + int64_t val_credit, val_debit; + + for (i = 0; i < MAX_SFI_CLASS_ID; i++){ + val_credit =0; + /* + * checking with prev_lentry != entry ensures adjacent classes + * which share the same ledger do not add wait times twice. + * Note: Use ledger() call to get data for each individual sfi class. + */ + if (prev_lentry != task_ledgers.sfi_wait_times[i] && + KERN_SUCCESS == ledger_get_entries(task->ledger, + task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) { + total_sfi_ledger_val += val_credit; + } + prev_lentry = task_ledgers.sfi_wait_times[i]; + } + + wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val; + *task_info_count = TASK_WAIT_STATE_INFO_COUNT; + + break; + } + default: error = KERN_INVALID_ARGUMENT; } @@ -2765,7 +3177,8 @@ task_info( void task_power_info_locked( task_t task, - task_power_info_t info) + task_power_info_t info, + gpu_energy_data_t ginfo) { thread_t thread; ledger_amount_t tmp; @@ -2783,6 +3196,10 @@ task_power_info_locked( info->total_user = task->total_user_time; info->total_system = task->total_system_time; + if (ginfo) { + ginfo->task_gpu_utilisation = task->task_gpu_ns; + } + queue_iterate(&task->threads, thread, thread_t, task_threads) { uint64_t tval; spl_t x; @@ -2807,11 +3224,43 @@ task_power_info_locked( info->total_user += tval; } + if (ginfo) { + ginfo->task_gpu_utilisation += ml_gpu_stat(thread); + } thread_unlock(thread); splx(x); } } +/* + * task_gpu_utilisation + * + * Returns the total gpu time used by the all the threads of the task + * (both dead and alive) + */ +uint64_t +task_gpu_utilisation( + task_t task) +{ + uint64_t gpu_time = 0; + thread_t thread; + + task_lock(task); + gpu_time += task->task_gpu_ns; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + spl_t x; + x = splsched(); + thread_lock(thread); + gpu_time += ml_gpu_stat(thread); + thread_unlock(thread); + splx(x); + } + + task_unlock(task); + return gpu_time; +} + kern_return_t task_purgable_info( task_t task, @@ -3035,26 +3484,6 @@ task_set_policy( return(KERN_FAILURE); } -#if FAST_TAS -kern_return_t -task_set_ras_pc( - task_t task, - vm_offset_t pc, - vm_offset_t endpc) -{ - extern int fast_tas_debug; - - if (fast_tas_debug) { - printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n", - task, pc, endpc); - } - task_lock(task); - task->fast_tas_base = pc; - task->fast_tas_end = endpc; - task_unlock(task); - return KERN_SUCCESS; -} -#else /* FAST_TAS */ kern_return_t task_set_ras_pc( __unused task_t task, @@ -3063,7 +3492,6 @@ task_set_ras_pc( { return KERN_FAILURE; } -#endif /* FAST_TAS */ void task_synchronizer_destroy_all(task_t task) @@ -3156,6 +3584,15 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) #ifdef MACH_BSD pid = proc_selfpid(); + + if (pid == 1) { + /* + * Cannot have ReportCrash analyzing + * a suspended initproc. + */ + return; + } + if (task->bsd_info != NULL) procname = proc_name_address(current_task()->bsd_info); #endif @@ -3199,7 +3636,14 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY); EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK); EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb); + + /* + * Use the _internal_ variant so that no user-space + * process can resume our task from under us. + */ + task_suspend_internal(task); exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + task_resume_internal(task); } /* @@ -3208,7 +3652,9 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1) { - ledger_amount_t max_footprint_mb; + ledger_amount_t max_footprint, max_footprint_mb; + ledger_amount_t footprint_after_purge; + task_t task; if (warning == LEDGER_WARNING_DIPPED_BELOW) { /* @@ -3217,14 +3663,36 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const return; } - ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb); - max_footprint_mb >>= 20; + task = current_task(); + + ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint); + max_footprint_mb = max_footprint >> 20; + + /* + * Try and purge all "volatile" memory in that task first. + */ + (void) task_purge_volatile_memory(task); + /* are we still over the limit ? */ + ledger_get_balance(task->ledger, + task_ledgers.phys_footprint, + &footprint_after_purge); + if ((!warning && + footprint_after_purge <= max_footprint) || + (warning && + footprint_after_purge <= ((max_footprint * + PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) { + /* all better now */ + ledger_reset_callback_state(task->ledger, + task_ledgers.phys_footprint); + return; + } + /* still over the limit after purging... */ /* * If this an actual violation (not a warning), * generate a non-fatal high watermark EXC_RESOURCE. */ - if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) { + if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) { THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb); } @@ -3382,68 +3850,6 @@ task_findtid(task_t task, uint64_t tid) return(THREAD_NULL); } - -#if CONFIG_MACF_MACH -/* - * Protect 2 task labels against modification by adding a reference on - * both label handles. The locks do not actually have to be held while - * using the labels as only labels with one reference can be modified - * in place. - */ - -void -tasklabel_lock2( - task_t a, - task_t b) -{ - labelh_reference(a->label); - labelh_reference(b->label); -} - -void -tasklabel_unlock2( - task_t a, - task_t b) -{ - labelh_release(a->label); - labelh_release(b->label); -} - -void -mac_task_label_update_internal( - struct label *pl, - struct task *task) -{ - - tasklabel_lock(task); - task->label = labelh_modify(task->label); - mac_task_label_update(pl, &task->maclabel); - tasklabel_unlock(task); - ip_lock(task->itk_self); - mac_port_label_update_cred(pl, &task->itk_self->ip_label); - ip_unlock(task->itk_self); -} - -void -mac_task_label_modify( - struct task *task, - void *arg, - void (*f) (struct label *l, void *arg)) -{ - - tasklabel_lock(task); - task->label = labelh_modify(task->label); - (*f)(&task->maclabel, arg); - tasklabel_unlock(task); -} - -struct label * -mac_task_get_label(struct task *task) -{ - return (&task->maclabel); -} -#endif - /* * Control the CPU usage monitor for a task. */ @@ -3624,3 +4030,91 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void) task_terminate_internal(task); } } + +kern_return_t +task_purge_volatile_memory( + task_t task) +{ + vm_map_t map; + int num_object_purged; + + if (task == TASK_NULL) + return KERN_INVALID_TASK; + + task_lock(task); + + if (!task->active) { + task_unlock(task); + return KERN_INVALID_TASK; + } + map = task->map; + if (map == VM_MAP_NULL) { + task_unlock(task); + return KERN_INVALID_TASK; + } + vm_map_reference(task->map); + + task_unlock(task); + + num_object_purged = vm_map_purge(map); + vm_map_deallocate(map); + + return KERN_SUCCESS; +} + +/* Placeholders for the task set/get voucher interfaces */ +kern_return_t +task_get_mach_voucher( + task_t task, + mach_voucher_selector_t __unused which, + ipc_voucher_t *voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + *voucher = NULL; + return KERN_SUCCESS; +} + +kern_return_t +task_set_mach_voucher( + task_t task, + ipc_voucher_t __unused voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + return KERN_SUCCESS; +} + +kern_return_t +task_swap_mach_voucher( + task_t task, + ipc_voucher_t new_voucher, + ipc_voucher_t *in_out_old_voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + *in_out_old_voucher = new_voucher; + return KERN_SUCCESS; +} + +void task_set_gpu_denied(task_t task, boolean_t denied) +{ + task_lock(task); + + if (denied) { + task->t_flags |= TF_GPU_DENIED; + } else { + task->t_flags &= ~TF_GPU_DENIED; + } + + task_unlock(task); +} + +boolean_t task_is_gpu_denied(task_t task) +{ + /* We don't need the lock to read this flag */ + return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE; +}