]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/coalition.c
xnu-2782.1.97.tar.gz
[apple/xnu.git] / osfmk / kern / coalition.c
diff --git a/osfmk/kern/coalition.c b/osfmk/kern/coalition.c
new file mode 100644 (file)
index 0000000..df8126b
--- /dev/null
@@ -0,0 +1,743 @@
+/*
+ * Copyright (c) 2013 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/kern_types.h>
+#include <mach/mach_types.h>
+#include <mach/boolean.h>
+
+#include <kern/coalition.h>
+#include <kern/host.h>
+#include <kern/ledger.h>
+#include <kern/kalloc.h>
+#include <kern/mach_param.h> /* for TASK_CHUNK */
+#include <kern/task.h>
+#include <kern/zalloc.h>
+
+#include <libkern/OSAtomic.h>
+
+#include <mach/coalition_notification_server.h>
+#include <mach/host_priv.h>
+#include <mach/host_special_ports.h>
+
+#include <sys/errno.h>
+
+/* defined in task.c */
+extern ledger_template_t task_ledger_template;
+
+/*
+ * Coalition zone needs limits. We expect there will be as many coalitions as
+ * tasks (same order of magnitude), so use the task zone's limits.
+ * */
+#define CONFIG_COALITION_MAX CONFIG_TASK_MAX
+#define COALITION_CHUNK TASK_CHUNK
+
+int unrestrict_coalition_syscalls;
+
+lck_attr_t coalitions_lck_attr;
+lck_grp_t coalitions_lck_grp;
+lck_grp_attr_t coalitions_lck_grp_attr;
+
+/* coalitions_list_lock protects coalition_count, coalitions queue, next_coalition_id. */
+decl_lck_mtx_data(static,coalitions_list_lock);
+static uint64_t coalition_count;
+static uint64_t coalition_next_id = 1;
+static queue_head_t coalitions;
+
+coalition_t default_coalition;
+
+zone_t coalition_zone;
+
+struct coalition {
+       uint64_t id;                    /* monotonically increasing */
+
+       ledger_t ledger;
+       uint64_t bytesread;
+       uint64_t byteswritten;
+       uint64_t gpu_time;
+
+       /*
+        * Count the length of time this coalition had at least one active task.
+        * This can be a 'denominator' to turn e.g. cpu_time to %cpu.
+        * */
+       uint64_t last_became_nonempty_time;
+       uint64_t time_nonempty;
+
+       uint64_t task_count;            /* Count of tasks that have started in this coalition */
+       uint64_t dead_task_count;       /* Count of tasks that have exited in this coalition; subtract from task_count to get count of "active" */
+       queue_head_t tasks;             /* List of active tasks in the coalition */
+
+       queue_chain_t coalitions;       /* global list of coalitions */
+
+       decl_lck_mtx_data(,lock)        /* Coalition lock. */
+
+       uint32_t ref_count;             /* Number of references to the memory containing this struct */
+       uint32_t active_count;          /* Number of members of (tasks in) the coalition, plus vouchers referring to the coalition */
+
+       unsigned int privileged : 1;    /* Members of this coalition may create and manage coalitions and may posix_spawn processes into selected coalitions */
+
+       /* ast? */
+
+       /* voucher */
+
+       /* state of the coalition */
+       unsigned int termrequested : 1;         /* launchd has requested termination when coalition becomes empty */
+       unsigned int terminated : 1;            /* coalition became empty and spawns are now forbidden */
+       unsigned int reaped : 1;                /* reaped, invisible to userspace, but waiting for ref_count to go to zero */
+       unsigned int notified : 1;              /* no-more-processes notification was sent via special port */
+};
+
+#define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0)
+#define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0)
+
+static void
+coalition_notify_user(uint64_t id, uint32_t flags)
+{
+       mach_port_t user_port;
+       kern_return_t kr;
+
+       kr = host_get_coalition_port(host_priv_self(), &user_port);
+       if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
+               return;
+       }
+
+       coalition_notification(user_port, id, flags);
+}
+
+/*
+ * coalition_find_by_id_internal
+ * Returns: Coalition object with specified id, NOT referenced.
+ *          If not found, returns COALITION_NULL.
+ * Condition: coalitions_list_lock must be LOCKED.
+ */
+static coalition_t
+coalition_find_by_id_internal(uint64_t coal_id)
+{
+       if (coal_id == 0) {
+               return COALITION_NULL;
+       }
+
+       lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED);
+       coalition_t coal;
+       queue_iterate(&coalitions, coal, coalition_t, coalitions) {
+               if (coal->id == coal_id) {
+                       return coal;
+               }
+       }
+       return COALITION_NULL;
+}
+
+kern_return_t
+coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out)
+{
+       kern_return_t kr;
+       ledger_amount_t credit, debit;
+
+       ledger_t sum_ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
+       if (sum_ledger == LEDGER_NULL) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       coalition_lock(coal);
+
+       /*
+        * Start with the coalition's ledger, which holds the totals from all
+        * the dead tasks.
+        */
+       ledger_rollup(sum_ledger, coal->ledger);
+       uint64_t bytesread = coal->bytesread;
+       uint64_t byteswritten = coal->byteswritten;
+       uint64_t gpu_time = coal->gpu_time;
+
+       /*
+        * Add to that all the active tasks' ledgers. Tasks cannot deallocate
+        * out from under us, since we hold the coalition lock.
+        */
+       task_t task;
+       queue_iterate(&coal->tasks, task, task_t, coalition_tasks) {
+               ledger_rollup(sum_ledger, task->ledger);
+               bytesread += task->task_io_stats->disk_reads.size;
+               byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
+               gpu_time += task_gpu_utilisation(task);
+       }
+
+       /* collect information from the coalition itself */
+       cru_out->tasks_started = coal->task_count;
+       cru_out->tasks_exited = coal->dead_task_count;
+
+       uint64_t time_nonempty = coal->time_nonempty;
+       uint64_t last_became_nonempty_time = coal->last_became_nonempty_time;
+
+       coalition_unlock(coal);
+
+       /* Copy the totals out of sum_ledger */
+       kr = ledger_get_entries(sum_ledger, task_ledgers.cpu_time,
+                       &credit, &debit);
+       if (kr != KERN_SUCCESS) {
+               credit = 0;
+       }
+       cru_out->cpu_time = credit;
+
+       kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups,
+                       &credit, &debit);
+       if (kr != KERN_SUCCESS) {
+               credit = 0;
+       }
+       cru_out->interrupt_wakeups = credit;
+
+       kr = ledger_get_entries(sum_ledger, task_ledgers.platform_idle_wakeups,
+                       &credit, &debit);
+       if (kr != KERN_SUCCESS) {
+               credit = 0;
+       }
+       cru_out->platform_idle_wakeups = credit;
+
+       cru_out->bytesread = bytesread;
+       cru_out->byteswritten = byteswritten;
+       cru_out->gpu_time = gpu_time;
+
+       ledger_dereference(sum_ledger);
+       sum_ledger = LEDGER_NULL;
+
+       if (last_became_nonempty_time) {
+               time_nonempty += mach_absolute_time() - last_became_nonempty_time;
+       }
+       absolutetime_to_nanoseconds(time_nonempty, &cru_out->time_nonempty);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * coalition_create_internal
+ * Returns: New coalition object, referenced for the caller and unlocked.
+ * Condition: coalitions_list_lock must be UNLOCKED.
+ */
+kern_return_t
+coalition_create_internal(coalition_t *out, boolean_t privileged)
+{
+       struct coalition *new_coal = (struct coalition *)zalloc(coalition_zone);
+       if (new_coal == COALITION_NULL) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+       bzero(new_coal, sizeof(*new_coal));
+
+       new_coal->ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
+       if (new_coal->ledger == NULL) {
+               zfree(coalition_zone, new_coal);
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       /* One for caller, one for coalitions list */
+       new_coal->ref_count = 2;
+
+       new_coal->privileged = privileged ? TRUE : FALSE;
+
+       lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, &coalitions_lck_attr);
+       queue_init(&new_coal->tasks);
+
+       lck_mtx_lock(&coalitions_list_lock);
+       new_coal->id = coalition_next_id++;
+       coalition_count++;
+       queue_enter(&coalitions, new_coal, coalition_t, coalitions);
+       lck_mtx_unlock(&coalitions_list_lock);
+
+#if COALITION_DEBUG
+       printf("%s: new coal id %llu\n", __func__, new_coal->id);
+#endif
+
+       *out = new_coal;
+       return KERN_SUCCESS;
+}
+
+/*
+ * coalition_release
+ * Condition: coalition must be UNLOCKED.
+ * */
+void
+coalition_release(coalition_t coal)
+{
+       boolean_t do_dealloc = FALSE;
+
+       /* TODO: This can be done with atomics. */
+       coalition_lock(coal);
+       coal->ref_count--;
+       if (coal->ref_count == 0) {
+               do_dealloc = TRUE;
+       }
+#if COALITION_DEBUG
+       uint32_t rc = coal->ref_count;
+#endif /* COALITION_DEBUG */
+
+       coalition_unlock(coal);
+
+#if COALITION_DEBUG
+       printf("%s: coal %llu ref_count-- -> %u%s\n", __func__, coal->id, rc,
+                       do_dealloc ? ", will deallocate now" : "");
+#endif /* COALITION_DEBUG */
+
+       if (do_dealloc) {
+               assert(coal->termrequested);
+               assert(coal->terminated);
+               assert(coal->active_count == 0);
+               assert(coal->reaped);
+
+               ledger_dereference(coal->ledger);
+               lck_mtx_destroy(&coal->lock, &coalitions_lck_grp);
+               zfree(coalition_zone, coal);
+       }
+}
+
+/*
+ * coalition_find_by_id
+ * Returns: Coalition object with specified id, referenced.
+ * Condition: coalitions_list_lock must be UNLOCKED.
+ */
+coalition_t
+coalition_find_by_id(uint64_t cid)
+{
+       if (cid == 0) {
+               return COALITION_NULL;
+       }
+
+       lck_mtx_lock(&coalitions_list_lock);
+
+       coalition_t coal = coalition_find_by_id_internal(cid);
+       if (coal == COALITION_NULL) {
+               lck_mtx_unlock(&coalitions_list_lock);
+               return COALITION_NULL;
+       }
+
+       coalition_lock(coal);
+
+       if (coal->reaped) {
+               coalition_unlock(coal);
+               lck_mtx_unlock(&coalitions_list_lock);
+               return COALITION_NULL;
+       }
+
+       if (coal->ref_count == 0) {
+               panic("resurrecting coalition %p id %llu, active_count = %u\n",
+                               coal, coal->id, coal->active_count);
+       }
+       coal->ref_count++;
+#if COALITION_DEBUG
+       uint32_t rc = coal->ref_count;
+#endif
+
+       coalition_unlock(coal);
+       lck_mtx_unlock(&coalitions_list_lock);
+
+#if COALITION_DEBUG
+       printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
+#endif
+       return coal;
+}
+
+/*
+ * coalition_find_and_activate_by_id
+ * Returns: Coalition object with specified id, referenced, and activated.
+ * Condition: coalitions_list_lock must be UNLOCKED.
+ * This is the function to use when putting a 'new' thing into a coalition,
+ * like posix_spawn of an XPC service by launchd.
+ * See also coalition_extend_active.
+ */
+coalition_t
+coalition_find_and_activate_by_id(uint64_t cid)
+{
+       if (cid == 0) {
+               return COALITION_NULL;
+       }
+
+       lck_mtx_lock(&coalitions_list_lock);
+
+       coalition_t coal = coalition_find_by_id_internal(cid);
+       if (coal == COALITION_NULL) {
+               lck_mtx_unlock(&coalitions_list_lock);
+               return COALITION_NULL;
+       }
+
+       coalition_lock(coal);
+
+       if (coal->reaped || coal->terminated) {
+               /* Too late to put something new into this coalition, it's
+                * already on its way out the door */
+               coalition_unlock(coal);
+               lck_mtx_unlock(&coalitions_list_lock);
+               return COALITION_NULL;
+       }
+
+       if (coal->ref_count == 0) {
+               panic("resurrecting coalition %p id %llu, active_count = %u\n",
+                               coal, coal->id, coal->active_count);
+       }
+
+       coal->ref_count++;
+       coal->active_count++;
+
+#if COALITION_DEBUG
+       uint32_t rc = coal->ref_count;
+       uint32_t ac = coal->active_count;
+#endif
+
+       coalition_unlock(coal);
+       lck_mtx_unlock(&coalitions_list_lock);
+
+#if COALITION_DEBUG
+       printf("%s: coal %llu ref_count++ -> %u, active_count++ -> %u\n",
+                       __func__, coal->id, rc, ac);
+#endif
+       return coal;
+}
+
+uint64_t
+coalition_id(coalition_t coal)
+{
+       return coal->id;
+}
+
+uint64_t
+task_coalition_id(task_t task)
+{
+       return task->coalition->id;
+}
+
+boolean_t
+coalition_is_privileged(coalition_t coal)
+{
+       return coal->privileged || unrestrict_coalition_syscalls;
+}
+
+boolean_t
+task_is_in_privileged_coalition(task_t task)
+{
+       return task->coalition->privileged || unrestrict_coalition_syscalls;
+}
+
+/*
+ * coalition_get_ledger
+ * Returns: Coalition's ledger, NOT referenced.
+ * Condition: Caller must have a coalition reference.
+ */
+ledger_t
+coalition_get_ledger(coalition_t coal)
+{
+       return coal->ledger;
+}
+
+/*
+ * This is the function to use when you already hold an activation on the
+ * coalition, and want to extend it to a second activation owned by a new
+ * object, like when a task in the coalition calls fork(). This is analogous
+ * to taking a second reference when you already hold one.
+ * See also coalition_find_and_activate_by_id.
+ */
+kern_return_t
+coalition_extend_active(coalition_t coal)
+{
+       coalition_lock(coal);
+
+       if (coal->reaped) {
+               panic("cannot make a reaped coalition active again");
+       }
+
+       if (coal->terminated) {
+               coalition_unlock(coal);
+               return KERN_TERMINATED;
+       }
+
+       assert(coal->active_count > 0);
+       coal->active_count++;
+
+       coalition_unlock(coal);
+       return KERN_SUCCESS;
+}
+
+void
+coalition_remove_active(coalition_t coal)
+{
+       coalition_lock(coal);
+
+       assert(!coal->reaped);
+       assert(coal->active_count > 0);
+
+       coal->active_count--;
+
+       boolean_t do_notify = FALSE;
+       uint64_t notify_id = 0;
+       uint32_t notify_flags = 0;
+       if (coal->termrequested && coal->active_count == 0) {
+               /* We only notify once, when active_count reaches zero.
+                * We just decremented, so if it reached zero, we mustn't have
+                * notified already.
+                */
+               assert(!coal->terminated);
+               coal->terminated = TRUE;
+
+               assert(!coal->notified);
+
+               coal->notified = TRUE;
+               do_notify = TRUE;
+               notify_id = coal->id;
+               notify_flags = 0;
+       }
+
+       coalition_unlock(coal);
+
+       if (do_notify) {
+               coalition_notify_user(notify_id, notify_flags);
+       }
+}
+
+/* Used for kernel_task, launchd, launchd's early boot tasks... */
+kern_return_t
+coalition_default_adopt_task(task_t task)
+{
+       kern_return_t kr;
+       kr = coalition_adopt_task(default_coalition, task);
+       if (kr != KERN_SUCCESS) {
+               panic("failed to adopt task %p into default coalition: %d", task, kr);
+       }
+       return kr;
+}
+
+/*
+ * coalition_adopt_task
+ * Condition: Coalition must be referenced and unlocked. Will fail if coalition
+ * is already terminated.
+ */
+kern_return_t
+coalition_adopt_task(coalition_t coal, task_t task)
+{
+       if (task->coalition) {
+               return KERN_ALREADY_IN_SET;
+       }
+
+       coalition_lock(coal);
+
+       if (coal->reaped || coal->terminated) {
+               coalition_unlock(coal);
+               return KERN_TERMINATED;
+       }
+
+       coal->active_count++;
+
+       coal->ref_count++;
+       task->coalition = coal;
+
+       queue_enter(&coal->tasks, task, task_t, coalition_tasks);
+       coal->task_count++;
+
+       if(coal->task_count < coal->dead_task_count) {
+               panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
+       }
+
+       /* If moving from 0->1 active tasks */
+       if (coal->task_count - coal->dead_task_count == 1) {
+               coal->last_became_nonempty_time = mach_absolute_time();
+       }
+
+#if COALITION_DEBUG
+       uint32_t rc = coal->ref_count;
+#endif
+
+       coalition_unlock(coal);
+
+#if COALITION_DEBUG
+       if (rc) {
+               printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
+       }
+#endif
+       return KERN_SUCCESS;
+}
+
+/*
+ * coalition_remove_task
+ * Condition: task must be referenced and UNLOCKED; task's coalition must be UNLOCKED
+ */
+kern_return_t
+coalition_remove_task(task_t task)
+{
+       coalition_t coal = task->coalition;
+       assert(coal);
+
+       coalition_lock(coal);
+
+       queue_remove(&coal->tasks, task, task_t, coalition_tasks);
+       coal->dead_task_count++;
+
+       if(coal->task_count < coal->dead_task_count) {
+               panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
+       }
+
+       /* If moving from 1->0 active tasks */
+       if (coal->task_count - coal->dead_task_count == 0) {
+               uint64_t last_time_nonempty = mach_absolute_time() - coal->last_became_nonempty_time;
+               coal->last_became_nonempty_time = 0;
+               coal->time_nonempty += last_time_nonempty;
+       }
+
+       ledger_rollup(coal->ledger, task->ledger);
+       coal->bytesread += task->task_io_stats->disk_reads.size;
+       coal->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
+       coal->gpu_time += task_gpu_utilisation(task);
+
+       coalition_unlock(coal);
+
+       coalition_remove_active(coal);
+       return KERN_SUCCESS;
+}
+
+/*
+ * coalition_terminate_internal
+ * Condition: Coalition must be referenced and UNLOCKED.
+ */
+kern_return_t
+coalition_request_terminate_internal(coalition_t coal)
+{
+       if (coal == default_coalition) {
+               return KERN_DEFAULT_SET;
+       }
+
+       coalition_lock(coal);
+
+       if (coal->reaped) {
+               coalition_unlock(coal);
+               return KERN_INVALID_NAME;
+       }
+
+       if (coal->terminated || coal->termrequested) {
+               coalition_unlock(coal);
+               return KERN_TERMINATED;
+       }
+
+       coal->termrequested = TRUE;
+
+       boolean_t do_notify = FALSE;
+       uint64_t note_id = 0;
+       uint32_t note_flags = 0;
+
+       if (coal->active_count == 0) {
+               /*
+                * We only notify once, when active_count reaches zero.
+                * We just decremented, so if it reached zero, we mustn't have
+                * notified already.
+                */
+               assert(!coal->terminated);
+               coal->terminated = TRUE;
+
+               assert(!coal->notified);
+
+               coal->notified = TRUE;
+               do_notify = TRUE;
+               note_id = coal->id;
+               note_flags = 0;
+       }
+
+       coalition_unlock(coal);
+
+       if (do_notify) {
+               coalition_notify_user(note_id, note_flags);
+       }
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * coalition_reap_internal
+ * Condition: Coalition must be referenced and UNLOCKED.
+ */
+kern_return_t
+coalition_reap_internal(coalition_t coal)
+{
+       if (coal == default_coalition) {
+               return KERN_DEFAULT_SET;
+       }
+
+       coalition_lock(coal);
+       if (coal->reaped) {
+               coalition_unlock(coal);
+               return KERN_TERMINATED;
+       }
+       if (!coal->terminated) {
+               coalition_unlock(coal);
+               return KERN_FAILURE;
+       }
+       assert(coal->termrequested);
+       if (coal->active_count > 0) {
+               coalition_unlock(coal);
+               return KERN_FAILURE;
+       }
+
+       coal->reaped = TRUE;
+
+       /* Caller, launchd, and coalitions list should each have a reference */
+       assert(coal->ref_count > 2);
+
+       coalition_unlock(coal);
+
+       lck_mtx_lock(&coalitions_list_lock);
+       coalition_count--;
+       queue_remove(&coalitions, coal, coalition_t, coalitions);
+       lck_mtx_unlock(&coalitions_list_lock);
+
+       /* Release the list's reference and launchd's reference. */
+       coalition_release(coal);
+       coalition_release(coal);
+
+       return KERN_SUCCESS;
+}
+
+void
+coalition_init(void)
+{
+       coalition_zone = zinit(
+                       sizeof(struct coalition),
+                       CONFIG_COALITION_MAX * sizeof(struct coalition),
+                       COALITION_CHUNK * sizeof(struct coalition),
+                       "coalitions");
+       zone_change(coalition_zone, Z_NOENCRYPT, TRUE);
+       queue_init(&coalitions);
+
+       if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls,
+               sizeof (unrestrict_coalition_syscalls))) {
+               unrestrict_coalition_syscalls = 0;
+       }
+
+       lck_grp_attr_setdefault(&coalitions_lck_grp_attr);
+       lck_grp_init(&coalitions_lck_grp, "coalition", &coalitions_lck_grp_attr);
+       lck_attr_setdefault(&coalitions_lck_attr);
+       lck_mtx_init(&coalitions_list_lock, &coalitions_lck_grp, &coalitions_lck_attr);
+
+       init_task_ledgers();
+
+       kern_return_t kr = coalition_create_internal(&default_coalition, TRUE);
+       if (kr != KERN_SUCCESS) {
+               panic("%s: could not create default coalition: %d", __func__, kr);
+       }
+       /* "Leak" our reference to the global object */
+}
+