X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/a1c7dba18ef36983396c282fe85292db066e39db..b226f5e54a60dc81db17b1260381d7dbfea3cdf1:/osfmk/kern/coalition.c diff --git a/osfmk/kern/coalition.c b/osfmk/kern/coalition.c index 95babd632..26f9d33a4 100644 --- a/osfmk/kern/coalition.c +++ b/osfmk/kern/coalition.c @@ -32,12 +32,12 @@ #include #include -#include #include +#include #include /* for TASK_CHUNK */ #include +#include #include -#include #include @@ -47,6 +47,17 @@ #include +/* + * BSD interface functions + */ +int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz); +boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal); +task_t coalition_get_leader(coalition_t coal); +int coalition_get_task_count(coalition_t coal); +uint64_t coalition_get_page_count(coalition_t coal, int *ntasks); +int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order, + int *pid_list, int list_sz); + /* defined in task.c */ extern ledger_template_t task_ledger_template; @@ -58,6 +69,7 @@ extern ledger_template_t task_ledger_template; #define COALITION_CHUNK TASK_CHUNK int unrestrict_coalition_syscalls; +int merge_adaptive_coalitions; lck_attr_t coalitions_lck_attr; lck_grp_t coalitions_lck_grp; @@ -67,20 +79,118 @@ lck_grp_attr_t coalitions_lck_grp_attr; decl_lck_mtx_data(static,coalitions_list_lock); static uint64_t coalition_count; static uint64_t coalition_next_id = 1; -static queue_head_t coalitions; +static queue_head_t coalitions_q; -coalition_t default_coalition; +coalition_t init_coalition[COALITION_NUM_TYPES]; +coalition_t corpse_coalition[COALITION_NUM_TYPES]; zone_t coalition_zone; -struct coalition { - uint64_t id; /* monotonically increasing */ +static const char *coal_type_str(int type) +{ + switch(type) { + case COALITION_TYPE_RESOURCE: + return "RESOURCE"; + case COALITION_TYPE_JETSAM: + return "JETSAM"; + default: + return ""; + } +} + +struct coalition_type { + int type; + int has_default; + /* + * init + * pre-condition: coalition just allocated (unlocked), unreferenced, + * type field set + */ + kern_return_t (*init)(coalition_t coal, boolean_t privileged); + + /* + * dealloc + * pre-condition: coalition unlocked + * pre-condition: coalition refcount=0, active_count=0, + * termrequested=1, terminated=1, reaped=1 + */ + void (*dealloc)(coalition_t coal); + + /* + * adopt_task + * pre-condition: coalition locked + * pre-condition: coalition !repead and !terminated + */ + kern_return_t (*adopt_task)(coalition_t coal, task_t task); + + /* + * remove_task + * pre-condition: coalition locked + * pre-condition: task has been removed from coalition's task list + */ + kern_return_t (*remove_task)(coalition_t coal, task_t task); + /* + * set_taskrole + * pre-condition: coalition locked + * pre-condition: task added to coalition's task list, + * active_count >= 1 (at least the given task is active) + */ + kern_return_t (*set_taskrole)(coalition_t coal, task_t task, int role); + + /* + * get_taskrole + * pre-condition: coalition locked + * pre-condition: task added to coalition's task list, + * active_count >= 1 (at least the given task is active) + */ + int (*get_taskrole)(coalition_t coal, task_t task); + + /* + * iterate_tasks + * pre-condition: coalition locked + */ + void (*iterate_tasks)(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t)); +}; + +/* + * COALITION_TYPE_RESOURCE + */ + +static kern_return_t i_coal_resource_init(coalition_t coal, boolean_t privileged); +static void i_coal_resource_dealloc(coalition_t coal); +static kern_return_t i_coal_resource_adopt_task(coalition_t coal, task_t task); +static kern_return_t i_coal_resource_remove_task(coalition_t coal, task_t task); +static kern_return_t i_coal_resource_set_taskrole(coalition_t coal, + task_t task, int role); +static int i_coal_resource_get_taskrole(coalition_t coal, task_t task); +static void i_coal_resource_iterate_tasks(coalition_t coal, void *ctx, + void (*callback)(coalition_t, void *, task_t)); + +/* + * Ensure COALITION_NUM_THREAD_QOS_TYPES defined in mach/coalition.h still + * matches THREAD_QOS_LAST defined in mach/thread_policy.h + */ +static_assert(COALITION_NUM_THREAD_QOS_TYPES == THREAD_QOS_LAST); + +struct i_resource_coalition { ledger_t ledger; uint64_t bytesread; uint64_t byteswritten; + uint64_t energy; uint64_t gpu_time; - + uint64_t logical_immediate_writes; + uint64_t logical_deferred_writes; + uint64_t logical_invalidated_writes; + uint64_t logical_metadata_writes; + uint64_t cpu_ptime; + uint64_t cpu_time_eqos[COALITION_NUM_THREAD_QOS_TYPES]; /* cpu time per effective QoS class */ + uint64_t cpu_time_rqos[COALITION_NUM_THREAD_QOS_TYPES]; /* cpu time per requested QoS class */ + + uint64_t task_count; /* tasks that have started in this coalition */ + uint64_t dead_task_count; /* tasks that have exited in this coalition; + subtract from task_count to get count + of "active" tasks */ /* * Count the length of time this coalition had at least one active task. * This can be a 'denominator' to turn e.g. cpu_time to %cpu. @@ -88,36 +198,123 @@ struct coalition { uint64_t last_became_nonempty_time; uint64_t time_nonempty; - uint64_t task_count; /* Count of tasks that have started in this coalition */ - uint64_t dead_task_count; /* Count of tasks that have exited in this coalition; subtract from task_count to get count of "active" */ - queue_head_t tasks; /* List of active tasks in the coalition */ - - queue_chain_t coalitions; /* global list of coalitions */ + queue_head_t tasks; /* List of active tasks in the coalition */ +}; - decl_lck_mtx_data(,lock) /* Coalition lock. */ +/* + * COALITION_TYPE_JETSAM + */ - uint32_t ref_count; /* Number of references to the memory containing this struct */ - uint32_t active_count; /* Number of members of (tasks in) the coalition, plus vouchers referring to the coalition */ +static kern_return_t i_coal_jetsam_init(coalition_t coal, boolean_t privileged); +static void i_coal_jetsam_dealloc(coalition_t coal); +static kern_return_t i_coal_jetsam_adopt_task(coalition_t coal, task_t task); +static kern_return_t i_coal_jetsam_remove_task(coalition_t coal, task_t task); +static kern_return_t i_coal_jetsam_set_taskrole(coalition_t coal, + task_t task, int role); +static int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task); +static void i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx, + void (*callback)(coalition_t, void *, task_t)); + +struct i_jetsam_coalition { + task_t leader; + queue_head_t extensions; + queue_head_t services; + queue_head_t other; + struct thread_group *thread_group; +}; - unsigned int privileged : 1; /* Members of this coalition may create and manage coalitions and may posix_spawn processes into selected coalitions */ +/* + * main coalition structure + */ +struct coalition { + uint64_t id; /* monotonically increasing */ + uint32_t type; + uint32_t role; /* default task role (background, adaptive, interactive, etc) */ + uint32_t ref_count; /* Number of references to the memory containing this struct */ + uint32_t active_count; /* Number of members of (tasks in) the + coalition, plus vouchers referring + to the coalition */ + uint32_t focal_task_count; /* Number of TASK_FOREGROUND_APPLICATION tasks in the coalition */ + uint32_t nonfocal_task_count; /* Number of TASK_BACKGROUND_APPLICATION tasks in the coalition */ + + /* coalition flags */ + uint32_t privileged : 1; /* Members of this coalition may create + and manage coalitions and may posix_spawn + processes into selected coalitions */ /* ast? */ - /* voucher */ + uint32_t termrequested : 1; /* launchd has requested termination when coalition becomes empty */ + uint32_t terminated : 1; /* coalition became empty and spawns are now forbidden */ + uint32_t reaped : 1; /* reaped, invisible to userspace, but waiting for ref_count to go to zero */ + uint32_t notified : 1; /* no-more-processes notification was sent via special port */ + uint32_t efficient : 1; /* launchd has marked the coalition as efficient */ +#if DEVELOPMENT || DEBUG + uint32_t should_notify : 1; /* should this coalition send notifications (default: yes) */ +#endif + + queue_chain_t coalitions; /* global list of coalitions */ - /* state of the coalition */ - unsigned int termrequested : 1; /* launchd has requested termination when coalition becomes empty */ - unsigned int terminated : 1; /* coalition became empty and spawns are now forbidden */ - unsigned int reaped : 1; /* reaped, invisible to userspace, but waiting for ref_count to go to zero */ - unsigned int notified : 1; /* no-more-processes notification was sent via special port */ + decl_lck_mtx_data(,lock) /* Coalition lock. */ - uint32_t focal_tasks_count; /* count of TASK_FOREGROUND_APPLICATION tasks in the coalition */ - uint32_t non_focal_tasks_count; /* count of TASK_BACKGROUND_APPLICATION tasks in the coalition */ + /* put coalition type-specific structures here */ + union { + struct i_resource_coalition r; + struct i_jetsam_coalition j; + }; }; +/* + * register different coalition types: + * these must be kept in the order specified in coalition.h + */ +static const struct coalition_type +s_coalition_types[COALITION_NUM_TYPES] = { + { + COALITION_TYPE_RESOURCE, + 1, + i_coal_resource_init, + i_coal_resource_dealloc, + i_coal_resource_adopt_task, + i_coal_resource_remove_task, + i_coal_resource_set_taskrole, + i_coal_resource_get_taskrole, + i_coal_resource_iterate_tasks, + }, + { + COALITION_TYPE_JETSAM, + 1, + i_coal_jetsam_init, + i_coal_jetsam_dealloc, + i_coal_jetsam_adopt_task, + i_coal_jetsam_remove_task, + i_coal_jetsam_set_taskrole, + i_coal_jetsam_get_taskrole, + i_coal_jetsam_iterate_tasks, + }, +}; + +#define coal_call(coal, func, ...) \ + (s_coalition_types[(coal)->type].func)(coal, ## __VA_ARGS__) + + #define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0) #define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0) +/* + * Define the coalition type to track focal tasks. + * On embedded, track them using jetsam coalitions since they have associated thread + * groups which reflect this property as a flag (and pass it down to CLPC). + * On non-embedded platforms, since not all coalitions have jetsam coalitions + * track focal counts on the resource coalition. + */ +#if CONFIG_EMBEDDED +#define COALITION_FOCAL_TASKS_ACCOUNTING COALITION_TYPE_JETSAM +#else /* CONFIG_EMBEDDED */ +#define COALITION_FOCAL_TASKS_ACCOUNTING COALITION_TYPE_RESOURCE +#endif /* CONFIG_EMBEDDED */ + + static void coalition_notify_user(uint64_t id, uint32_t flags) { @@ -130,29 +327,154 @@ coalition_notify_user(uint64_t id, uint32_t flags) } coalition_notification(user_port, id, flags); + ipc_port_release_send(user_port); } /* - * coalition_find_by_id_internal - * Returns: Coalition object with specified id, NOT referenced. - * If not found, returns COALITION_NULL. - * Condition: coalitions_list_lock must be LOCKED. + * + * COALITION_TYPE_RESOURCE + * */ -static coalition_t -coalition_find_by_id_internal(uint64_t coal_id) +static kern_return_t +i_coal_resource_init(coalition_t coal, boolean_t privileged) { - if (coal_id == 0) { - return COALITION_NULL; + (void)privileged; + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + coal->r.ledger = ledger_instantiate(task_ledger_template, + LEDGER_CREATE_ACTIVE_ENTRIES); + if (coal->r.ledger == NULL) + return KERN_RESOURCE_SHORTAGE; + + queue_init(&coal->r.tasks); + + return KERN_SUCCESS; +} + +static void +i_coal_resource_dealloc(coalition_t coal) +{ + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + ledger_dereference(coal->r.ledger); +} + +static kern_return_t +i_coal_resource_adopt_task(coalition_t coal, task_t task) +{ + struct i_resource_coalition *cr; + + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + assert(queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE])); + + cr = &coal->r; + cr->task_count++; + + if (cr->task_count < cr->dead_task_count) { + panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)", + __func__, coal, coal->id, coal_type_str(coal->type), + cr->task_count, cr->dead_task_count); } - lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED); - coalition_t coal; - queue_iterate(&coalitions, coal, coalition_t, coalitions) { - if (coal->id == coal_id) { - return coal; - } + /* If moving from 0->1 active tasks */ + if (cr->task_count - cr->dead_task_count == 1) { + cr->last_became_nonempty_time = mach_absolute_time(); } - return COALITION_NULL; + + /* put the task on the coalition's list of tasks */ + enqueue_tail(&cr->tasks, &task->task_coalition[COALITION_TYPE_RESOURCE]); + + coal_dbg("Added PID:%d to id:%llu, task_count:%llu, dead_count:%llu, nonempty_time:%llu", + task_pid(task), coal->id, cr->task_count, cr->dead_task_count, + cr->last_became_nonempty_time); + + return KERN_SUCCESS; +} + +static kern_return_t +i_coal_resource_remove_task(coalition_t coal, task_t task) +{ + struct i_resource_coalition *cr; + + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + assert(task->coalition[COALITION_TYPE_RESOURCE] == coal); + assert(!queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE])); + + /* + * handle resource coalition accounting rollup for dead tasks + */ + cr = &coal->r; + + cr->dead_task_count++; + + if (cr->task_count < cr->dead_task_count) { + panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)", + __func__, coal, coal->id, coal_type_str(coal->type), cr->task_count, cr->dead_task_count); + } + + /* If moving from 1->0 active tasks */ + if (cr->task_count - cr->dead_task_count == 0) { + uint64_t last_time_nonempty = mach_absolute_time() - cr->last_became_nonempty_time; + cr->last_became_nonempty_time = 0; + cr->time_nonempty += last_time_nonempty; + } + + /* Do not roll up for exec'd task or exec copy task */ + if (!task_is_exec_copy(task) && !task_did_exec(task)) { + ledger_rollup(cr->ledger, task->ledger); + cr->bytesread += task->task_io_stats->disk_reads.size; + cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; +#if !CONFIG_EMBEDDED + cr->gpu_time += task_gpu_utilisation(task); +#else + cr->energy += task_energy(task); +#endif + cr->logical_immediate_writes += task->task_immediate_writes; + cr->logical_deferred_writes += task->task_deferred_writes; + cr->logical_invalidated_writes += task->task_invalidated_writes; + cr->logical_metadata_writes += task->task_metadata_writes; + cr->cpu_ptime += task_cpu_ptime(task); + task_update_cpu_time_qos_stats(task, cr->cpu_time_eqos, cr->cpu_time_rqos); + } + + /* remove the task from the coalition's list */ + remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]); + queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]); + + coal_dbg("removed PID:%d from id:%llu, task_count:%llu, dead_count:%llu", + task_pid(task), coal->id, cr->task_count, cr->dead_task_count); + + return KERN_SUCCESS; +} + +static kern_return_t +i_coal_resource_set_taskrole(__unused coalition_t coal, + __unused task_t task, __unused int role) +{ + return KERN_SUCCESS; +} + +static int +i_coal_resource_get_taskrole(__unused coalition_t coal, __unused task_t task) +{ + task_t t; + + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + + qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) { + if (t == task) + return COALITION_TASKROLE_UNDEF; + } + + return -1; +} + +static void +i_coal_resource_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t)) +{ + task_t t; + assert(coal && coal->type == COALITION_TYPE_RESOURCE); + + qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) + callback(coal, ctx, t); } kern_return_t @@ -160,11 +482,21 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us { kern_return_t kr; ledger_amount_t credit, debit; + int i; + + if (coal->type != COALITION_TYPE_RESOURCE) + return KERN_INVALID_ARGUMENT; + + /* Return KERN_INVALID_ARGUMENT for Corpse coalition */ + for (i = 0; i < COALITION_NUM_TYPES; i++) { + if (coal == corpse_coalition[i]) { + return KERN_INVALID_ARGUMENT; + } + } ledger_t sum_ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES); - if (sum_ledger == LEDGER_NULL) { + if (sum_ledger == LEDGER_NULL) return KERN_RESOURCE_SHORTAGE; - } coalition_lock(coal); @@ -172,29 +504,80 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us * Start with the coalition's ledger, which holds the totals from all * the dead tasks. */ - ledger_rollup(sum_ledger, coal->ledger); - uint64_t bytesread = coal->bytesread; - uint64_t byteswritten = coal->byteswritten; - uint64_t gpu_time = coal->gpu_time; - + ledger_rollup(sum_ledger, coal->r.ledger); + uint64_t bytesread = coal->r.bytesread; + uint64_t byteswritten = coal->r.byteswritten; + uint64_t gpu_time = coal->r.gpu_time; + uint64_t energy = coal->r.energy; + uint64_t logical_immediate_writes = coal->r.logical_immediate_writes; + uint64_t logical_deferred_writes = coal->r.logical_deferred_writes; + uint64_t logical_invalidated_writes = coal->r.logical_invalidated_writes; + uint64_t logical_metadata_writes = coal->r.logical_metadata_writes; + int64_t cpu_time_billed_to_me = 0; + int64_t cpu_time_billed_to_others = 0; + int64_t energy_billed_to_me = 0; + int64_t energy_billed_to_others = 0; + uint64_t cpu_ptime = coal->r.cpu_ptime; + uint64_t cpu_time_eqos[COALITION_NUM_THREAD_QOS_TYPES]; + memcpy(cpu_time_eqos, coal->r.cpu_time_eqos, sizeof(cpu_time_eqos)); + uint64_t cpu_time_rqos[COALITION_NUM_THREAD_QOS_TYPES]; + memcpy(cpu_time_rqos, coal->r.cpu_time_rqos, sizeof(cpu_time_rqos)); /* * Add to that all the active tasks' ledgers. Tasks cannot deallocate * out from under us, since we hold the coalition lock. */ task_t task; - queue_iterate(&coal->tasks, task, task_t, coalition_tasks) { + qe_foreach_element(task, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) { + /* + * Rolling up stats for exec copy task or exec'd task will lead to double accounting. + * Cannot take task lock after taking coaliton lock + */ + if (task_is_exec_copy(task) || task_did_exec(task)) { + continue; + } + ledger_rollup(sum_ledger, task->ledger); bytesread += task->task_io_stats->disk_reads.size; byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; +#if !CONFIG_EMBEDDED gpu_time += task_gpu_utilisation(task); +#else + energy += task_energy(task); +#endif + logical_immediate_writes += task->task_immediate_writes; + logical_deferred_writes += task->task_deferred_writes; + logical_invalidated_writes += task->task_invalidated_writes; + logical_metadata_writes += task->task_metadata_writes; + cpu_ptime += task_cpu_ptime(task); + task_update_cpu_time_qos_stats(task, cpu_time_eqos, cpu_time_rqos); + } + + kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_me, (int64_t *)&cpu_time_billed_to_me); + if (kr != KERN_SUCCESS || cpu_time_billed_to_me < 0) { + cpu_time_billed_to_me = 0; + } + + kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_others, (int64_t *)&cpu_time_billed_to_others); + if (kr != KERN_SUCCESS || cpu_time_billed_to_others < 0) { + cpu_time_billed_to_others = 0; + } + + kr = ledger_get_balance(sum_ledger, task_ledgers.energy_billed_to_me, (int64_t *)&energy_billed_to_me); + if (kr != KERN_SUCCESS || energy_billed_to_me < 0) { + energy_billed_to_me = 0; + } + + kr = ledger_get_balance(sum_ledger, task_ledgers.energy_billed_to_others, (int64_t *)&energy_billed_to_others); + if (kr != KERN_SUCCESS || energy_billed_to_others < 0) { + energy_billed_to_others = 0; } /* collect information from the coalition itself */ - cru_out->tasks_started = coal->task_count; - cru_out->tasks_exited = coal->dead_task_count; + cru_out->tasks_started = coal->r.task_count; + cru_out->tasks_exited = coal->r.dead_task_count; - uint64_t time_nonempty = coal->time_nonempty; - uint64_t last_became_nonempty_time = coal->last_became_nonempty_time; + uint64_t time_nonempty = coal->r.time_nonempty; + uint64_t last_became_nonempty_time = coal->r.last_became_nonempty_time; coalition_unlock(coal); @@ -205,6 +588,10 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us credit = 0; } cru_out->cpu_time = credit; + cru_out->cpu_time_billed_to_me = (uint64_t)cpu_time_billed_to_me; + cru_out->cpu_time_billed_to_others = (uint64_t)cpu_time_billed_to_others; + cru_out->energy_billed_to_me = (uint64_t)energy_billed_to_me; + cru_out->energy_billed_to_others = (uint64_t)energy_billed_to_others; kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups, &credit, &debit); @@ -223,7 +610,14 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us cru_out->bytesread = bytesread; cru_out->byteswritten = byteswritten; cru_out->gpu_time = gpu_time; - + cru_out->energy = energy; + cru_out->logical_immediate_writes = logical_immediate_writes; + cru_out->logical_deferred_writes = logical_deferred_writes; + cru_out->logical_invalidated_writes = logical_invalidated_writes; + cru_out->logical_metadata_writes = logical_metadata_writes; + cru_out->cpu_ptime = cpu_ptime; + cru_out->cpu_time_eqos_len = COALITION_NUM_THREAD_QOS_TYPES; + memcpy(cru_out->cpu_time_eqos, cpu_time_eqos, sizeof(cru_out->cpu_time_eqos)); ledger_dereference(sum_ledger); sum_ledger = LEDGER_NULL; @@ -235,43 +629,259 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us return KERN_SUCCESS; } +/* + * + * COALITION_TYPE_JETSAM + * + */ +static kern_return_t +i_coal_jetsam_init(coalition_t coal, boolean_t privileged) +{ + assert(coal && coal->type == COALITION_TYPE_JETSAM); + (void)privileged; + + coal->j.leader= TASK_NULL; + queue_head_init(coal->j.extensions); + queue_head_init(coal->j.services); + queue_head_init(coal->j.other); + + return KERN_SUCCESS; +} + +static void +i_coal_jetsam_dealloc(__unused coalition_t coal) +{ + assert(coal && coal->type == COALITION_TYPE_JETSAM); + + /* the coalition should be completely clear at this point */ + assert(queue_empty(&coal->j.extensions)); + assert(queue_empty(&coal->j.services)); + assert(queue_empty(&coal->j.other)); + assert(coal->j.leader == TASK_NULL); + +} + +static kern_return_t +i_coal_jetsam_adopt_task(coalition_t coal, task_t task) +{ + struct i_jetsam_coalition *cj; + assert(coal && coal->type == COALITION_TYPE_JETSAM); + + cj = &coal->j; + + assert(queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM])); + + /* put each task initially in the "other" list */ + enqueue_tail(&cj->other, &task->task_coalition[COALITION_TYPE_JETSAM]); + coal_dbg("coalition %lld adopted PID:%d as UNDEF", + coal->id, task_pid(task)); + + return KERN_SUCCESS; +} + +static kern_return_t +i_coal_jetsam_remove_task(coalition_t coal, task_t task) +{ + assert(coal && coal->type == COALITION_TYPE_JETSAM); + assert(task->coalition[COALITION_TYPE_JETSAM] == coal); + + coal_dbg("removing PID:%d from coalition id:%lld", + task_pid(task), coal->id); + + if (task == coal->j.leader) { + coal->j.leader = NULL; + coal_dbg(" PID:%d was the leader!", task_pid(task)); + } else { + assert(!queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM])); + } + + /* remove the task from the specific coalition role queue */ + remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]); + queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]); + + return KERN_SUCCESS; +} + +static kern_return_t +i_coal_jetsam_set_taskrole(coalition_t coal, task_t task, int role) +{ + struct i_jetsam_coalition *cj; + queue_t q = NULL; + assert(coal && coal->type == COALITION_TYPE_JETSAM); + assert(task->coalition[COALITION_TYPE_JETSAM] == coal); + + cj = &coal->j; + + switch (role) { + case COALITION_TASKROLE_LEADER: + coal_dbg("setting PID:%d as LEADER of %lld", + task_pid(task), coal->id); + if (cj->leader != TASK_NULL) { + /* re-queue the exiting leader onto the "other" list */ + coal_dbg(" re-queue existing leader (%d) as OTHER", + task_pid(cj->leader)); + re_queue_tail(&cj->other, &cj->leader->task_coalition[COALITION_TYPE_JETSAM]); + } + /* + * remove the task from the "other" list + * (where it was put by default) + */ + remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]); + queue_chain_init(task->task_coalition[COALITION_TYPE_JETSAM]); + + /* set the coalition leader */ + cj->leader = task; + break; + case COALITION_TASKROLE_XPC: + coal_dbg("setting PID:%d as XPC in %lld", + task_pid(task), coal->id); + q = (queue_t)&cj->services; + break; + case COALITION_TASKROLE_EXT: + coal_dbg("setting PID:%d as EXT in %lld", + task_pid(task), coal->id); + q = (queue_t)&cj->extensions; + break; + case COALITION_TASKROLE_NONE: + /* + * Tasks with a role of "none" should fall through to an + * undefined role so long as the task is currently a member + * of the coalition. This scenario can happen if a task is + * killed (usually via jetsam) during exec. + */ + if (task->coalition[COALITION_TYPE_JETSAM] != coal) { + panic("%s: task %p attempting to set role %d " + "in coalition %p to which it does not belong!", __func__, task, role, coal); + } + /* fall through */ + case COALITION_TASKROLE_UNDEF: + coal_dbg("setting PID:%d as UNDEF in %lld", + task_pid(task), coal->id); + q = (queue_t)&cj->other; + break; + default: + panic("%s: invalid role(%d) for task", __func__, role); + return KERN_INVALID_ARGUMENT; + } + + if (q != NULL) + re_queue_tail(q, &task->task_coalition[COALITION_TYPE_JETSAM]); + + return KERN_SUCCESS; +} + +static int +i_coal_jetsam_get_taskrole(coalition_t coal, task_t task) +{ + struct i_jetsam_coalition *cj; + task_t t; + + assert(coal && coal->type == COALITION_TYPE_JETSAM); + assert(task->coalition[COALITION_TYPE_JETSAM] == coal); + + cj = &coal->j; + + if (task == cj->leader) + return COALITION_TASKROLE_LEADER; + + qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM]) { + if (t == task) + return COALITION_TASKROLE_XPC; + } + + qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM]) { + if (t == task) + return COALITION_TASKROLE_EXT; + } + + qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM]) { + if (t == task) + return COALITION_TASKROLE_UNDEF; + } + + /* task not in the coalition?! */ + return COALITION_TASKROLE_NONE; +} + +static void +i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t)) +{ + struct i_jetsam_coalition *cj; + task_t t; + + assert(coal && coal->type == COALITION_TYPE_JETSAM); + + cj = &coal->j; + + if (cj->leader) + callback(coal, ctx, cj->leader); + + qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM]) + callback(coal, ctx, t); + + qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM]) + callback(coal, ctx, t); + + qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM]) + callback(coal, ctx, t); +} + + +/* + * + * Main Coalition implementation + * + */ + /* * coalition_create_internal * Returns: New coalition object, referenced for the caller and unlocked. * Condition: coalitions_list_lock must be UNLOCKED. */ kern_return_t -coalition_create_internal(coalition_t *out, boolean_t privileged) +coalition_create_internal(int type, int role, boolean_t privileged, coalition_t *out) { - struct coalition *new_coal = (struct coalition *)zalloc(coalition_zone); - if (new_coal == COALITION_NULL) { + kern_return_t kr; + struct coalition *new_coal; + + if (type < 0 || type > COALITION_TYPE_MAX) + return KERN_INVALID_ARGUMENT; + + new_coal = (struct coalition *)zalloc(coalition_zone); + if (new_coal == COALITION_NULL) return KERN_RESOURCE_SHORTAGE; - } bzero(new_coal, sizeof(*new_coal)); - new_coal->ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES); - if (new_coal->ledger == NULL) { + new_coal->type = type; + new_coal->role = role; + + /* initialize type-specific resources */ + kr = coal_call(new_coal, init, privileged); + if (kr != KERN_SUCCESS) { zfree(coalition_zone, new_coal); - return KERN_RESOURCE_SHORTAGE; + return kr; } /* One for caller, one for coalitions list */ new_coal->ref_count = 2; new_coal->privileged = privileged ? TRUE : FALSE; +#if DEVELOPMENT || DEBUG + new_coal->should_notify = 1; +#endif lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, &coalitions_lck_attr); - queue_init(&new_coal->tasks); lck_mtx_lock(&coalitions_list_lock); new_coal->id = coalition_next_id++; coalition_count++; - queue_enter(&coalitions, new_coal, coalition_t, coalitions); + enqueue_tail(&coalitions_q, &new_coal->coalitions); + + KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_NEW), + new_coal->id, new_coal->type); lck_mtx_unlock(&coalitions_list_lock); -#if COALITION_DEBUG - printf("%s: new coal id %llu\n", __func__, new_coal->id); -#endif + coal_dbg("id:%llu, type:%s", new_coal->id, coal_type_str(new_coal->type)); *out = new_coal; return KERN_SUCCESS; @@ -284,37 +894,63 @@ coalition_create_internal(coalition_t *out, boolean_t privileged) void coalition_release(coalition_t coal) { - boolean_t do_dealloc = FALSE; - /* TODO: This can be done with atomics. */ coalition_lock(coal); coal->ref_count--; - if (coal->ref_count == 0) { - do_dealloc = TRUE; - } + #if COALITION_DEBUG uint32_t rc = coal->ref_count; + uint32_t ac = coal->active_count; #endif /* COALITION_DEBUG */ + coal_dbg("id:%llu type:%s ref_count:%u active_count:%u%s", + coal->id, coal_type_str(coal->type), rc, ac, + rc <= 0 ? ", will deallocate now" : ""); + + if (coal->ref_count > 0) { + coalition_unlock(coal); + return; + } + + assert(coal->termrequested); + assert(coal->terminated); + assert(coal->active_count == 0); + assert(coal->reaped); + assert(coal->focal_task_count == 0); + assert(coal->nonfocal_task_count == 0); + KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_FREE), + coal->id, coal->type); + + coal_call(coal, dealloc); + coalition_unlock(coal); -#if COALITION_DEBUG - printf("%s: coal %llu ref_count-- -> %u%s\n", __func__, coal->id, rc, - do_dealloc ? ", will deallocate now" : ""); -#endif /* COALITION_DEBUG */ + lck_mtx_destroy(&coal->lock, &coalitions_lck_grp); - if (do_dealloc) { - assert(coal->termrequested); - assert(coal->terminated); - assert(coal->active_count == 0); - assert(coal->reaped); - assert(coal->focal_tasks_count == 0); - assert(coal->non_focal_tasks_count == 0); + zfree(coalition_zone, coal); +} - ledger_dereference(coal->ledger); - lck_mtx_destroy(&coal->lock, &coalitions_lck_grp); - zfree(coalition_zone, coal); +/* + * coalition_find_by_id_internal + * Returns: Coalition object with specified id, NOT referenced. + * If not found, returns COALITION_NULL. + * Condition: coalitions_list_lock must be LOCKED. + */ +static coalition_t +coalition_find_by_id_internal(uint64_t coal_id) +{ + if (coal_id == 0) { + return COALITION_NULL; } + + lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED); + coalition_t coal; + qe_foreach_element(coal, &coalitions_q, coalitions) { + if (coal->id == coal_id) { + return coal; + } + } + return COALITION_NULL; } /* @@ -346,8 +982,8 @@ coalition_find_by_id(uint64_t cid) } if (coal->ref_count == 0) { - panic("resurrecting coalition %p id %llu, active_count = %u\n", - coal, coal->id, coal->active_count); + panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n", + coal, coal->id, coal_type_str(coal->type), coal->active_count); } coal->ref_count++; #if COALITION_DEBUG @@ -357,9 +993,9 @@ coalition_find_by_id(uint64_t cid) coalition_unlock(coal); lck_mtx_unlock(&coalitions_list_lock); -#if COALITION_DEBUG - printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc); -#endif + coal_dbg("id:%llu type:%s ref_count:%u", + coal->id, coal_type_str(coal->type), rc); + return coal; } @@ -397,8 +1033,8 @@ coalition_find_and_activate_by_id(uint64_t cid) } if (coal->ref_count == 0) { - panic("resurrecting coalition %p id %llu, active_count = %u\n", - coal, coal->id, coal->active_count); + panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n", + coal, coal->id, coal_type_str(coal->type), coal->active_count); } coal->ref_count++; @@ -412,10 +1048,9 @@ coalition_find_and_activate_by_id(uint64_t cid) coalition_unlock(coal); lck_mtx_unlock(&coalitions_list_lock); -#if COALITION_DEBUG - printf("%s: coal %llu ref_count++ -> %u, active_count++ -> %u\n", - __func__, coal->id, rc, ac); -#endif + coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u", + coal->id, coal_type_str(coal->type), rc, ac); + return coal; } @@ -425,63 +1060,159 @@ coalition_id(coalition_t coal) return coal->id; } -uint64_t -task_coalition_id(task_t task) +void +task_coalition_ids(task_t task, uint64_t ids[COALITION_NUM_TYPES]) +{ + int i; + for (i = 0; i < COALITION_NUM_TYPES; i++) { + if (task->coalition[i]) + ids[i] = task->coalition[i]->id; + else + ids[i] = 0; + } +} + +void +task_coalition_roles(task_t task, int roles[COALITION_NUM_TYPES]) +{ + int i; + memset(roles, 0, COALITION_NUM_TYPES * sizeof(roles[0])); + + for (i = 0; i < COALITION_NUM_TYPES; i++) { + if (task->coalition[i]) { + coalition_lock(task->coalition[i]); + roles[i] = coal_call(task->coalition[i], + get_taskrole, task); + coalition_unlock(task->coalition[i]); + } else { + roles[i] = COALITION_TASKROLE_NONE; + } + } +} + + +int +coalition_type(coalition_t coal) +{ + return coal->type; +} + +boolean_t +coalition_term_requested(coalition_t coal) +{ + return coal->termrequested; +} + +boolean_t +coalition_is_terminated(coalition_t coal) +{ + return coal->terminated; +} + +boolean_t +coalition_is_reaped(coalition_t coal) +{ + return coal->reaped; +} + +boolean_t +coalition_is_privileged(coalition_t coal) +{ + return coal->privileged || unrestrict_coalition_syscalls; +} + +boolean_t +task_is_in_privileged_coalition(task_t task, int type) +{ + if (type < 0 || type > COALITION_TYPE_MAX) + return FALSE; + if (unrestrict_coalition_syscalls) + return TRUE; + if (!task->coalition[type]) + return FALSE; + return task->coalition[type]->privileged; +} + +void task_coalition_update_gpu_stats(task_t task, uint64_t gpu_ns_delta) +{ + coalition_t coal; + + assert(task != TASK_NULL); + if (gpu_ns_delta == 0) + return; + + coal = task->coalition[COALITION_TYPE_RESOURCE]; + assert(coal != COALITION_NULL); + + coalition_lock(coal); + coal->r.gpu_time += gpu_ns_delta; + coalition_unlock(coal); +} + +boolean_t task_coalition_adjust_focal_count(task_t task, int count, uint32_t *new_count) +{ + coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING]; + if (coal == COALITION_NULL) + return FALSE; + + *new_count = hw_atomic_add(&coal->focal_task_count, count); + assert(*new_count != UINT32_MAX); + return TRUE; +} + +uint32_t task_coalition_focal_count(task_t task) { - return task->coalition->id; + coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING]; + if (coal == COALITION_NULL) + return 0; + + return coal->focal_task_count; } -boolean_t -coalition_is_privileged(coalition_t coal) +boolean_t task_coalition_adjust_nonfocal_count(task_t task, int count, uint32_t *new_count) { - return coal->privileged || unrestrict_coalition_syscalls; + coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING]; + if (coal == COALITION_NULL) + return FALSE; + + *new_count = hw_atomic_add(&coal->nonfocal_task_count, count); + assert(*new_count != UINT32_MAX); + return TRUE; } -boolean_t -task_is_in_privileged_coalition(task_t task) +uint32_t task_coalition_nonfocal_count(task_t task) { - return task->coalition->privileged || unrestrict_coalition_syscalls; + coalition_t coal = task->coalition[COALITION_FOCAL_TASKS_ACCOUNTING]; + if (coal == COALITION_NULL) + return 0; + + return coal->nonfocal_task_count; } -/* - * coalition_get_ledger - * Returns: Coalition's ledger, NOT referenced. - * Condition: Caller must have a coalition reference. - */ -ledger_t -coalition_get_ledger(coalition_t coal) +void coalition_set_efficient(coalition_t coal) { - return coal->ledger; + coalition_lock(coal); + coal->efficient = TRUE; + coalition_unlock(coal); } -/* - * This is the function to use when you already hold an activation on the - * coalition, and want to extend it to a second activation owned by a new - * object, like when a task in the coalition calls fork(). This is analogous - * to taking a second reference when you already hold one. - * See also coalition_find_and_activate_by_id. - */ -kern_return_t -coalition_extend_active(coalition_t coal) + +void coalition_for_each_task(coalition_t coal, void *ctx, + void (*callback)(coalition_t, void *, task_t)) { - coalition_lock(coal); + assert(coal != COALITION_NULL); - if (coal->reaped) { - panic("cannot make a reaped coalition active again"); - } + coal_dbg("iterating tasks in coalition %p id:%llu type:%s, active_count:%u", + coal, coal->id, coal_type_str(coal->type), coal->active_count); - if (coal->terminated) { - coalition_unlock(coal); - return KERN_TERMINATED; - } + coalition_lock(coal); - assert(coal->active_count > 0); - coal->active_count++; + coal_call(coal, iterate_tasks, ctx, callback); coalition_unlock(coal); - return KERN_SUCCESS; } + void coalition_remove_active(coalition_t coal) { @@ -506,13 +1237,26 @@ coalition_remove_active(coalition_t coal) assert(!coal->notified); coal->notified = TRUE; +#if DEVELOPMENT || DEBUG + do_notify = coal->should_notify; +#else do_notify = TRUE; +#endif notify_id = coal->id; notify_flags = 0; } +#if COALITION_DEBUG + uint64_t cid = coal->id; + uint32_t rc = coal->ref_count; + int ac = coal->active_count; + int ct = coal->type; +#endif coalition_unlock(coal); + coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u,%s", + cid, coal_type_str(ct), rc, ac, do_notify ? " NOTIFY" : " "); + if (do_notify) { coalition_notify_user(notify_id, notify_flags); } @@ -520,25 +1264,39 @@ coalition_remove_active(coalition_t coal) /* Used for kernel_task, launchd, launchd's early boot tasks... */ kern_return_t -coalition_default_adopt_task(task_t task) +coalitions_adopt_init_task(task_t task) { kern_return_t kr; - kr = coalition_adopt_task(default_coalition, task); + kr = coalitions_adopt_task(init_coalition, task); if (kr != KERN_SUCCESS) { panic("failed to adopt task %p into default coalition: %d", task, kr); } return kr; } +/* Used for forked corpses. */ +kern_return_t +coalitions_adopt_corpse_task(task_t task) +{ + kern_return_t kr; + kr = coalitions_adopt_task(corpse_coalition, task); + if (kr != KERN_SUCCESS) { + panic("failed to adopt task %p into corpse coalition: %d", task, kr); + } + return kr; +} + /* - * coalition_adopt_task + * coalition_adopt_task_internal * Condition: Coalition must be referenced and unlocked. Will fail if coalition * is already terminated. */ -kern_return_t -coalition_adopt_task(coalition_t coal, task_t task) +static kern_return_t +coalition_adopt_task_internal(coalition_t coal, task_t task) { - if (task->coalition) { + kern_return_t kr; + + if (task->coalition[coal->type]) { return KERN_ALREADY_IN_SET; } @@ -549,72 +1307,163 @@ coalition_adopt_task(coalition_t coal, task_t task) return KERN_TERMINATED; } + kr = coal_call(coal, adopt_task, task); + if (kr != KERN_SUCCESS) + goto out_unlock; + coal->active_count++; coal->ref_count++; - task->coalition = coal; - - queue_enter(&coal->tasks, task, task_t, coalition_tasks); - coal->task_count++; - - if(coal->task_count < coal->dead_task_count) { - panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id); - } - /* If moving from 0->1 active tasks */ - if (coal->task_count - coal->dead_task_count == 1) { - coal->last_became_nonempty_time = mach_absolute_time(); - } + task->coalition[coal->type] = coal; +out_unlock: #if COALITION_DEBUG + (void)coal; /* need expression after label */ + uint64_t cid = coal->id; uint32_t rc = coal->ref_count; + uint32_t ct = coal->type; #endif + if (get_task_uniqueid(task) != UINT64_MAX) { + /* On 32-bit targets, uniqueid will get truncated to 32 bits */ + KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_ADOPT), + coal->id, get_task_uniqueid(task)); + } coalition_unlock(coal); + coal_dbg("task:%d, id:%llu type:%s ref_count:%u, kr=%d", + task_pid(task), cid, coal_type_str(ct), rc, kr); + return kr; +} + +static kern_return_t +coalition_remove_task_internal(task_t task, int type) +{ + kern_return_t kr; + + coalition_t coal = task->coalition[type]; + + if (!coal) + return KERN_SUCCESS; + + assert(coal->type == (uint32_t)type); + + coalition_lock(coal); + + kr = coal_call(coal, remove_task, task); + #if COALITION_DEBUG - if (rc) { - printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc); - } + uint64_t cid = coal->id; + uint32_t rc = coal->ref_count; + int ac = coal->active_count; + int ct = coal->type; #endif - return KERN_SUCCESS; + KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_COALITION, MACH_COALITION_REMOVE), + coal->id, get_task_uniqueid(task)); + coalition_unlock(coal); + + coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u, kr=%d", + cid, coal_type_str(ct), rc, ac, kr); + + coalition_remove_active(coal); + + return kr; } /* - * coalition_remove_task - * Condition: task must be referenced and UNLOCKED; task's coalition must be UNLOCKED + * coalitions_adopt_task + * Condition: All coalitions must be referenced and unlocked. + * Will fail if any coalition is already terminated. */ kern_return_t -coalition_remove_task(task_t task) +coalitions_adopt_task(coalition_t *coals, task_t task) { - coalition_t coal = task->coalition; - assert(coal); + int i; + kern_return_t kr; - coalition_lock(coal); + if (!coals || coals[COALITION_TYPE_RESOURCE] == COALITION_NULL) + return KERN_INVALID_ARGUMENT; + + /* verify that the incoming coalitions are what they say they are */ + for (i = 0; i < COALITION_NUM_TYPES; i++) + if (coals[i] && coals[i]->type != (uint32_t)i) + return KERN_INVALID_ARGUMENT; + + for (i = 0; i < COALITION_NUM_TYPES; i++) { + kr = KERN_SUCCESS; + if (coals[i]) + kr = coalition_adopt_task_internal(coals[i], task); + if (kr != KERN_SUCCESS) { + /* dis-associate any coalitions that just adopted this task */ + while (--i >= 0) { + if (task->coalition[i]) + coalition_remove_task_internal(task, i); + } + break; + } + } + return kr; +} - queue_remove(&coal->tasks, task, task_t, coalition_tasks); - coal->dead_task_count++; +/* + * coalitions_remove_task + * Condition: task must be referenced and UNLOCKED; all task's coalitions must be UNLOCKED + */ +kern_return_t +coalitions_remove_task(task_t task) +{ + kern_return_t kr; + int i; - if(coal->task_count < coal->dead_task_count) { - panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id); + for (i = 0; i < COALITION_NUM_TYPES; i++) { + kr = coalition_remove_task_internal(task, i); + assert(kr == KERN_SUCCESS); } - /* If moving from 1->0 active tasks */ - if (coal->task_count - coal->dead_task_count == 0) { - uint64_t last_time_nonempty = mach_absolute_time() - coal->last_became_nonempty_time; - coal->last_became_nonempty_time = 0; - coal->time_nonempty += last_time_nonempty; + return kr; +} + +/* + * task_release_coalitions + * helper function to release references to all coalitions in which + * 'task' is a member. + */ +void +task_release_coalitions(task_t task) +{ + int i; + for (i = 0; i < COALITION_NUM_TYPES; i++) { + if (task->coalition[i]) { + coalition_release(task->coalition[i]); + } else if (i == COALITION_TYPE_RESOURCE) { + panic("deallocating task %p was not a member of a resource coalition", task); + } } +} - ledger_rollup(coal->ledger, task->ledger); - coal->bytesread += task->task_io_stats->disk_reads.size; - coal->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; - coal->gpu_time += task_gpu_utilisation(task); +/* + * coalitions_set_roles + * for each type of coalition, if the task is a member of a coalition of + * that type (given in the coalitions parameter) then set the role of + * the task within that that coalition. + */ +kern_return_t coalitions_set_roles(coalition_t coalitions[COALITION_NUM_TYPES], + task_t task, int roles[COALITION_NUM_TYPES]) +{ + kern_return_t kr = KERN_SUCCESS; + int i; - coalition_unlock(coal); + for (i = 0; i < COALITION_NUM_TYPES; i++) { + if (!coalitions[i]) + continue; + coalition_lock(coalitions[i]); + kr = coal_call(coalitions[i], set_taskrole, task, roles[i]); + coalition_unlock(coalitions[i]); + assert(kr == KERN_SUCCESS); + } - coalition_remove_active(coal); - return KERN_SUCCESS; + return kr; } /* @@ -624,7 +1473,9 @@ coalition_remove_task(task_t task) kern_return_t coalition_request_terminate_internal(coalition_t coal) { - if (coal == default_coalition) { + assert(coal->type >= 0 && coal->type <= COALITION_TYPE_MAX); + + if (coal == init_coalition[coal->type]) { return KERN_DEFAULT_SET; } @@ -649,8 +1500,9 @@ coalition_request_terminate_internal(coalition_t coal) if (coal->active_count == 0) { /* * We only notify once, when active_count reaches zero. - * We just decremented, so if it reached zero, we mustn't have - * notified already. + * We just set termrequested to zero. If the active count + * was already at zero (tasks died before we could request + * a termination notification), we should notify. */ assert(!coal->terminated); coal->terminated = TRUE; @@ -658,7 +1510,11 @@ coalition_request_terminate_internal(coalition_t coal) assert(!coal->notified); coal->notified = TRUE; +#if DEVELOPMENT || DEBUG + do_notify = coal->should_notify; +#else do_notify = TRUE; +#endif note_id = coal->id; note_flags = 0; } @@ -679,7 +1535,9 @@ coalition_request_terminate_internal(coalition_t coal) kern_return_t coalition_reap_internal(coalition_t coal) { - if (coal == default_coalition) { + assert(coal->type <= COALITION_TYPE_MAX); + + if (coal == init_coalition[coal->type]) { return KERN_DEFAULT_SET; } @@ -707,7 +1565,7 @@ coalition_reap_internal(coalition_t coal) lck_mtx_lock(&coalitions_list_lock); coalition_count--; - queue_remove(&coalitions, coal, coalition_t, coalitions); + remqueue(&coal->coalitions); lck_mtx_unlock(&coalitions_list_lock); /* Release the list's reference and launchd's reference. */ @@ -717,22 +1575,54 @@ coalition_reap_internal(coalition_t coal) return KERN_SUCCESS; } +#if DEVELOPMENT || DEBUG +int coalition_should_notify(coalition_t coal) +{ + int should; + if (!coal) + return -1; + coalition_lock(coal); + should = coal->should_notify; + coalition_unlock(coal); + + return should; +} + +void coalition_set_notify(coalition_t coal, int notify) +{ + if (!coal) + return; + coalition_lock(coal); + coal->should_notify = !!notify; + coalition_unlock(coal); +} +#endif + void -coalition_init(void) +coalitions_init(void) { + kern_return_t kr; + int i; + const struct coalition_type *ctype; + coalition_zone = zinit( sizeof(struct coalition), CONFIG_COALITION_MAX * sizeof(struct coalition), COALITION_CHUNK * sizeof(struct coalition), "coalitions"); zone_change(coalition_zone, Z_NOENCRYPT, TRUE); - queue_init(&coalitions); + queue_head_init(coalitions_q); if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls, sizeof (unrestrict_coalition_syscalls))) { unrestrict_coalition_syscalls = 0; } + if (!PE_parse_boot_argn("tg_adaptive", &merge_adaptive_coalitions, + sizeof (merge_adaptive_coalitions))) { + merge_adaptive_coalitions = 0; + } + lck_grp_attr_setdefault(&coalitions_lck_grp_attr); lck_grp_init(&coalitions_lck_grp, "coalition", &coalitions_lck_grp_attr); lck_attr_setdefault(&coalitions_lck_attr); @@ -740,54 +1630,453 @@ coalition_init(void) init_task_ledgers(); - kern_return_t kr = coalition_create_internal(&default_coalition, TRUE); - if (kr != KERN_SUCCESS) { - panic("%s: could not create default coalition: %d", __func__, kr); + for (i = 0, ctype = &s_coalition_types[0]; i < COALITION_NUM_TYPES; ctype++, i++) { + /* verify the entry in the global coalition types array */ + if (ctype->type != i || + !ctype->init || + !ctype->dealloc || + !ctype->adopt_task || + !ctype->remove_task) { + panic("%s: Malformed coalition type %s(%d) in slot for type:%s(%d)", + __func__, coal_type_str(ctype->type), ctype->type, coal_type_str(i), i); + } + if (!ctype->has_default) + continue; + kr = coalition_create_internal(ctype->type, COALITION_ROLE_SYSTEM, TRUE, &init_coalition[ctype->type]); + if (kr != KERN_SUCCESS) + panic("%s: could not create init %s coalition: kr:%d", + __func__, coal_type_str(i), kr); + kr = coalition_create_internal(ctype->type, COALITION_ROLE_SYSTEM, FALSE, &corpse_coalition[ctype->type]); + if (kr != KERN_SUCCESS) + panic("%s: could not create corpse %s coalition: kr:%d", + __func__, coal_type_str(i), kr); } + /* "Leak" our reference to the global object */ } -/* coalition focal tasks */ -uint32_t coalition_adjust_focal_task_count(coalition_t coal, int count) +/* + * BSD Kernel interface functions + * + */ +static void coalition_fill_procinfo(struct coalition *coal, + struct procinfo_coalinfo *coalinfo) +{ + coalinfo->coalition_id = coal->id; + coalinfo->coalition_type = coal->type; + coalinfo->coalition_tasks = coalition_get_task_count(coal); +} + + +int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz) { - return hw_atomic_add(&coal->focal_tasks_count, count); + int ncoals = 0; + struct coalition *coal; + + lck_mtx_lock(&coalitions_list_lock); + qe_foreach_element(coal, &coalitions_q, coalitions) { + if (!coal->reaped && (type < 0 || type == (int)coal->type)) { + if (coal_list && ncoals < list_sz) + coalition_fill_procinfo(coal, &coal_list[ncoals]); + ++ncoals; + } + } + lck_mtx_unlock(&coalitions_list_lock); + + return ncoals; } -uint32_t coalition_focal_task_count(coalition_t coal) +/* + * Jetsam coalition interface + * + */ +boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal) { - return coal->focal_tasks_count; + coalition_t c; + boolean_t ret; + + if (coal) /* handle the error cases gracefully */ + *coal = COALITION_NULL; + + if (!task) + return FALSE; + + if (coal_type > COALITION_TYPE_MAX) + return FALSE; + + c = task->coalition[coal_type]; + if (!c) + return FALSE; + + assert((int)c->type == coal_type); + + coalition_lock(c); + + if (coal) + *coal = c; + + ret = FALSE; + if (c->type == COALITION_TYPE_JETSAM && c->j.leader == task) + ret = TRUE; + + coalition_unlock(c); + + return ret; } -uint32_t coalition_adjust_non_focal_task_count(coalition_t coal, int count) +kern_return_t coalition_iterate_stackshot(coalition_iterate_fn_t callout, void *arg, uint32_t coalition_type) { - return hw_atomic_add(&coal->non_focal_tasks_count, count); + coalition_t coal; + int i = 0; + + qe_foreach_element(coal, &coalitions_q, coalitions) { + if (coal == NULL || !ml_validate_nofault((vm_offset_t)coal, sizeof(struct coalition))) + return KERN_FAILURE; + + if (coalition_type == coal->type) + callout(arg, i++, coal); + } + + return KERN_SUCCESS; } -uint32_t coalition_non_focal_task_count(coalition_t coal) +task_t kdp_coalition_get_leader(coalition_t coal) { - return coal->non_focal_tasks_count; + if (!coal) + return TASK_NULL; + + if (coal->type == COALITION_TYPE_JETSAM) { + return coal->j.leader; + } + return TASK_NULL; } -/* Call sfi_reevaluate() for every thread in the coalition */ -void coalition_sfi_reevaluate(coalition_t coal, task_t updated_task) { - task_t task; - thread_t thread; +task_t coalition_get_leader(coalition_t coal) +{ + task_t leader = TASK_NULL; + + if (!coal) + return TASK_NULL; coalition_lock(coal); + if (coal->type != COALITION_TYPE_JETSAM) + goto out_unlock; - queue_iterate(&coal->tasks, task, task_t, coalition_tasks) { + leader = coal->j.leader; + if (leader != TASK_NULL) + task_reference(leader); - /* Skip the task we're doing this on behalf of - it's already updated */ - if (task == updated_task) - continue; +out_unlock: + coalition_unlock(coal); + return leader; +} + + +int coalition_get_task_count(coalition_t coal) +{ + int ntasks = 0; + struct queue_entry *qe; + if (!coal) + return 0; + + coalition_lock(coal); + switch (coal->type) { + case COALITION_TYPE_RESOURCE: + qe_foreach(qe, &coal->r.tasks) + ntasks++; + break; + case COALITION_TYPE_JETSAM: + if (coal->j.leader) + ntasks++; + qe_foreach(qe, &coal->j.other) + ntasks++; + qe_foreach(qe, &coal->j.extensions) + ntasks++; + qe_foreach(qe, &coal->j.services) + ntasks++; + break; + default: + break; + } + coalition_unlock(coal); + + return ntasks; +} + + +static uint64_t i_get_list_footprint(queue_t list, int type, int *ntasks) +{ + task_t task; + uint64_t bytes = 0; + + qe_foreach_element(task, list, task_coalition[type]) { + bytes += get_task_phys_footprint(task); + coal_dbg(" [%d] task_pid:%d, type:%d, footprint:%lld", + *ntasks, task_pid(task), type, bytes); + *ntasks += 1; + } + + return bytes; +} + +uint64_t coalition_get_page_count(coalition_t coal, int *ntasks) +{ + uint64_t bytes = 0; + int num_tasks = 0; - task_lock(task); + if (ntasks) + *ntasks = 0; + if (!coal) + return bytes; - queue_iterate(&task->threads, thread, thread_t, task_threads) { - sfi_reevaluate(thread); + coalition_lock(coal); + + switch (coal->type) { + case COALITION_TYPE_RESOURCE: + bytes += i_get_list_footprint(&coal->r.tasks, COALITION_TYPE_RESOURCE, &num_tasks); + break; + case COALITION_TYPE_JETSAM: + if (coal->j.leader) { + bytes += get_task_phys_footprint(coal->j.leader); + num_tasks = 1; } - task_unlock(task); + bytes += i_get_list_footprint(&coal->j.extensions, COALITION_TYPE_JETSAM, &num_tasks); + bytes += i_get_list_footprint(&coal->j.services, COALITION_TYPE_JETSAM, &num_tasks); + bytes += i_get_list_footprint(&coal->j.other, COALITION_TYPE_JETSAM, &num_tasks); + break; + default: + break; } + coalition_unlock(coal); + + if (ntasks) + *ntasks = num_tasks; + + return bytes / PAGE_SIZE_64; +} + +struct coal_sort_s { + int pid; + int usr_order; + uint64_t bytes; +}; + +/* + * return < 0 for a < b + * 0 for a == b + * > 0 for a > b + */ +typedef int (*cmpfunc_t)(const void *a, const void *b); + +extern void +qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); + +static int dflt_cmp(const void *a, const void *b) +{ + const struct coal_sort_s *csA = (const struct coal_sort_s *)a; + const struct coal_sort_s *csB = (const struct coal_sort_s *)b; + + /* + * if both A and B are equal, use a memory descending sort + */ + if (csA->usr_order == csB->usr_order) + return (int)((int64_t)csB->bytes - (int64_t)csA->bytes); + + /* otherwise, return the relationship between user specified orders */ + return (csA->usr_order - csB->usr_order); +} + +static int mem_asc_cmp(const void *a, const void *b) +{ + const struct coal_sort_s *csA = (const struct coal_sort_s *)a; + const struct coal_sort_s *csB = (const struct coal_sort_s *)b; + + return (int)((int64_t)csA->bytes - (int64_t)csB->bytes); +} + +static int mem_dec_cmp(const void *a, const void *b) +{ + const struct coal_sort_s *csA = (const struct coal_sort_s *)a; + const struct coal_sort_s *csB = (const struct coal_sort_s *)b; + + return (int)((int64_t)csB->bytes - (int64_t)csA->bytes); +} + +static int usr_asc_cmp(const void *a, const void *b) +{ + const struct coal_sort_s *csA = (const struct coal_sort_s *)a; + const struct coal_sort_s *csB = (const struct coal_sort_s *)b; + + return (csA->usr_order - csB->usr_order); +} + +static int usr_dec_cmp(const void *a, const void *b) +{ + const struct coal_sort_s *csA = (const struct coal_sort_s *)a; + const struct coal_sort_s *csB = (const struct coal_sort_s *)b; + + return (csB->usr_order - csA->usr_order); +} + +/* avoid dynamic allocation in this path */ +#define MAX_SORTED_PIDS 80 + +static int coalition_get_sort_list(coalition_t coal, int sort_order, queue_t list, + struct coal_sort_s *sort_array, int array_sz) +{ + int ntasks = 0; + task_t task; + + assert(sort_array != NULL); + + if (array_sz <= 0) + return 0; + + if (!list) { + /* + * this function will only be called with a NULL + * list for JETSAM-type coalitions, and is intended + * to investigate the leader process + */ + if (coal->type != COALITION_TYPE_JETSAM || + coal->j.leader == TASK_NULL) + return 0; + sort_array[0].pid = task_pid(coal->j.leader); + switch (sort_order) { + case COALITION_SORT_DEFAULT: + sort_array[0].usr_order = 0; + /* fall-through */ + case COALITION_SORT_MEM_ASC: + case COALITION_SORT_MEM_DEC: + sort_array[0].bytes = get_task_phys_footprint(coal->j.leader); + break; + case COALITION_SORT_USER_ASC: + case COALITION_SORT_USER_DEC: + sort_array[0].usr_order = 0; + break; + default: + break; + } + return 1; + } + + qe_foreach_element(task, list, task_coalition[coal->type]) { + if (ntasks >= array_sz) { + printf("WARNING: more than %d pids in coalition %llu\n", + MAX_SORTED_PIDS, coal->id); + break; + } + + sort_array[ntasks].pid = task_pid(task); + + switch (sort_order) { + case COALITION_SORT_DEFAULT: + sort_array[ntasks].usr_order = 0; + /* fall-through */ + case COALITION_SORT_MEM_ASC: + case COALITION_SORT_MEM_DEC: + sort_array[ntasks].bytes = get_task_phys_footprint(task); + break; + case COALITION_SORT_USER_ASC: + case COALITION_SORT_USER_DEC: + sort_array[ntasks].usr_order = 0; + break; + default: + break; + } + + ntasks++; + } + + return ntasks; } +int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order, + int *pid_list, int list_sz) +{ + struct i_jetsam_coalition *cj; + int ntasks = 0; + cmpfunc_t cmp_func = NULL; + struct coal_sort_s sort_array[MAX_SORTED_PIDS] = { {0,0,0} }; /* keep to < 2k */ + + if (!coal || + !(rolemask & COALITION_ROLEMASK_ALLROLES) || + !pid_list || list_sz < 1) { + coal_dbg("Invalid parameters: coal:%p, type:%d, rolemask:0x%x, " + "pid_list:%p, list_sz:%d", coal, coal ? coal->type : -1, + rolemask, pid_list, list_sz); + return -EINVAL; + } + + switch (sort_order) { + case COALITION_SORT_NOSORT: + cmp_func = NULL; + break; + case COALITION_SORT_DEFAULT: + cmp_func = dflt_cmp; + break; + case COALITION_SORT_MEM_ASC: + cmp_func = mem_asc_cmp; + break; + case COALITION_SORT_MEM_DEC: + cmp_func = mem_dec_cmp; + break; + case COALITION_SORT_USER_ASC: + cmp_func = usr_asc_cmp; + break; + case COALITION_SORT_USER_DEC: + cmp_func = usr_dec_cmp; + break; + default: + return -ENOTSUP; + } + + coalition_lock(coal); + + if (coal->type == COALITION_TYPE_RESOURCE) { + ntasks += coalition_get_sort_list(coal, sort_order, &coal->r.tasks, + sort_array, MAX_SORTED_PIDS); + goto unlock_coal; + } + + cj = &coal->j; + + if (rolemask & COALITION_ROLEMASK_UNDEF) + ntasks += coalition_get_sort_list(coal, sort_order, &cj->other, + sort_array + ntasks, + MAX_SORTED_PIDS - ntasks); + + if (rolemask & COALITION_ROLEMASK_XPC) + ntasks += coalition_get_sort_list(coal, sort_order, &cj->services, + sort_array + ntasks, + MAX_SORTED_PIDS - ntasks); + + if (rolemask & COALITION_ROLEMASK_EXT) + ntasks += coalition_get_sort_list(coal, sort_order, &cj->extensions, + sort_array + ntasks, + MAX_SORTED_PIDS - ntasks); + + if (rolemask & COALITION_ROLEMASK_LEADER) + ntasks += coalition_get_sort_list(coal, sort_order, NULL, + sort_array + ntasks, + MAX_SORTED_PIDS - ntasks); + +unlock_coal: + coalition_unlock(coal); + + /* sort based on the chosen criterion (no sense sorting 1 item) */ + if (cmp_func && ntasks > 1) + qsort(sort_array, ntasks, sizeof(struct coal_sort_s), cmp_func); + + for (int i = 0; i < ntasks; i++) { + if (i >= list_sz) + break; + coal_dbg(" [%d] PID:%d, footprint:%lld, usr_order:%d", + i, sort_array[i].pid, sort_array[i].bytes, + sort_array[i].usr_order); + pid_list[i] = sort_array[i].pid; + } + + return ntasks; +}