+int
+proc_task_role_to_darwin_role(int task_role)
+{
+ switch (task_role) {
+ case TASK_FOREGROUND_APPLICATION:
+ return PRIO_DARWIN_ROLE_UI_FOCAL;
+ case TASK_BACKGROUND_APPLICATION:
+ return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
+ case TASK_NONUI_APPLICATION:
+ return PRIO_DARWIN_ROLE_NON_UI;
+ case TASK_DEFAULT_APPLICATION:
+ return PRIO_DARWIN_ROLE_UI;
+ case TASK_THROTTLE_APPLICATION:
+ return PRIO_DARWIN_ROLE_TAL_LAUNCH;
+ case TASK_UNSPECIFIED:
+ default:
+ return PRIO_DARWIN_ROLE_DEFAULT;
+ }
+}
+
+
+/* TODO: remove this variable when interactive daemon audit period is over */
+extern boolean_t ipc_importance_interactive_receiver;
+
+/*
+ * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
+ *
+ * TODO: Make this function more table-driven instead of ad-hoc
+ */
+void
+proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
+ ipc_port_t * portwatch_ports, int portwatch_count)
+{
+ struct task_pend_token pend_token = {};
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
+ task_pid(task), trequested_0(task), trequested_1(task),
+ apptype, 0);
+
+ switch (apptype) {
+ case TASK_APPTYPE_APP_TAL:
+ case TASK_APPTYPE_APP_DEFAULT:
+ /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
+ task_importance_mark_donor(task, FALSE);
+ task_importance_mark_live_donor(task, TRUE);
+ task_importance_mark_receiver(task, FALSE);
+#if CONFIG_EMBEDDED
+ task_importance_mark_denap_receiver(task, FALSE);
+#else
+ /* Apps are de-nap recievers on desktop for suppression behaviors */
+ task_importance_mark_denap_receiver(task, TRUE);
+#endif /* CONFIG_EMBEDDED */
+ break;
+
+ case TASK_APPTYPE_DAEMON_INTERACTIVE:
+ task_importance_mark_donor(task, TRUE);
+ task_importance_mark_live_donor(task, FALSE);
+
+ /*
+ * A boot arg controls whether interactive daemons are importance receivers.
+ * Normally, they are not. But for testing their behavior as an adaptive
+ * daemon, the boot-arg can be set.
+ *
+ * TODO: remove this when the interactive daemon audit period is over.
+ */
+ task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
+ task_importance_mark_denap_receiver(task, FALSE);
+ break;
+
+ case TASK_APPTYPE_DAEMON_STANDARD:
+ task_importance_mark_donor(task, TRUE);
+ task_importance_mark_live_donor(task, FALSE);
+ task_importance_mark_receiver(task, FALSE);
+ task_importance_mark_denap_receiver(task, FALSE);
+ break;
+
+ case TASK_APPTYPE_DAEMON_ADAPTIVE:
+ task_importance_mark_donor(task, FALSE);
+ task_importance_mark_live_donor(task, FALSE);
+ task_importance_mark_receiver(task, TRUE);
+ task_importance_mark_denap_receiver(task, FALSE);
+ break;
+
+ case TASK_APPTYPE_DAEMON_BACKGROUND:
+ task_importance_mark_donor(task, FALSE);
+ task_importance_mark_live_donor(task, FALSE);
+ task_importance_mark_receiver(task, FALSE);
+ task_importance_mark_denap_receiver(task, FALSE);
+ break;
+
+ case TASK_APPTYPE_NONE:
+ break;
+ }
+
+ if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
+ int portwatch_boosts = 0;
+
+ for (int i = 0; i < portwatch_count; i++) {
+ ipc_port_t port = NULL;
+
+ if ((port = portwatch_ports[i]) != NULL) {
+ int boost = 0;
+ task_add_importance_watchport(task, port, &boost);
+ portwatch_boosts += boost;
+ }
+ }
+
+ if (portwatch_boosts > 0) {
+ task_importance_hold_internal_assertion(task, portwatch_boosts);
+ }
+ }
+
+ task_lock(task);
+
+ if (apptype == TASK_APPTYPE_APP_TAL) {
+ /* TAL starts off enabled by default */
+ task->requested_policy.trp_tal_enabled = 1;
+ }
+
+ if (apptype != TASK_APPTYPE_NONE) {
+ task->requested_policy.trp_apptype = apptype;
+ }
+
+#if CONFIG_EMBEDDED
+ /* Remove this after launchd starts setting it properly */
+ if (apptype == TASK_APPTYPE_APP_DEFAULT && role == TASK_UNSPECIFIED) {
+ task->requested_policy.trp_role = TASK_FOREGROUND_APPLICATION;
+ } else
+#endif
+ if (role != TASK_UNSPECIFIED) {
+ task->requested_policy.trp_role = role;
+ }
+
+ if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
+ task->requested_policy.trp_qos_clamp = qos_clamp;
+ }
+
+ task_policy_update_locked(task, &pend_token);
+
+ task_unlock(task);
+
+ /* Ensure the donor bit is updated to be in sync with the new live donor status */
+ pend_token.tpt_update_live_donor = 1;
+
+ task_policy_update_complete_unlocked(task, &pend_token);
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
+ task_pid(task), trequested_0(task), trequested_1(task),
+ task_is_importance_receiver(task), 0);
+}
+
+/*
+ * Inherit task role across exec
+ */
+void
+proc_inherit_task_role(task_t new_task,
+ task_t old_task)
+{
+ int role;
+
+ /* inherit the role from old task to new task */
+ role = proc_get_task_policy(old_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
+ proc_set_task_policy(new_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role);
+}
+
+extern void *initproc;
+
+/*
+ * Compute the default main thread qos for a task
+ */
+int
+task_compute_main_thread_qos(task_t task)
+{
+ int primordial_qos = THREAD_QOS_UNSPECIFIED;
+
+ int qos_clamp = task->requested_policy.trp_qos_clamp;
+
+ switch (task->requested_policy.trp_apptype) {
+ case TASK_APPTYPE_APP_TAL:
+ case TASK_APPTYPE_APP_DEFAULT:
+ primordial_qos = THREAD_QOS_USER_INTERACTIVE;
+ break;
+
+ case TASK_APPTYPE_DAEMON_INTERACTIVE:
+ case TASK_APPTYPE_DAEMON_STANDARD:
+ case TASK_APPTYPE_DAEMON_ADAPTIVE:
+ primordial_qos = THREAD_QOS_LEGACY;
+ break;
+
+ case TASK_APPTYPE_DAEMON_BACKGROUND:
+ primordial_qos = THREAD_QOS_BACKGROUND;
+ break;
+ }
+
+ if (task->bsd_info == initproc) {
+ /* PID 1 gets a special case */
+ primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED);
+ }
+
+ if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
+ if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
+ primordial_qos = MIN(qos_clamp, primordial_qos);
+ } else {
+ primordial_qos = qos_clamp;
+ }
+ }
+
+ return primordial_qos;
+}
+
+
+/* for process_policy to check before attempting to set */
+boolean_t
+proc_task_is_tal(task_t task)
+{
+ return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
+}
+
+int
+task_get_apptype(task_t task)
+{
+ return task->requested_policy.trp_apptype;
+}
+
+boolean_t
+task_is_daemon(task_t task)
+{
+ switch (task->requested_policy.trp_apptype) {
+ case TASK_APPTYPE_DAEMON_INTERACTIVE:
+ case TASK_APPTYPE_DAEMON_STANDARD:
+ case TASK_APPTYPE_DAEMON_ADAPTIVE:
+ case TASK_APPTYPE_DAEMON_BACKGROUND:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+boolean_t
+task_is_app(task_t task)
+{
+ switch (task->requested_policy.trp_apptype) {
+ case TASK_APPTYPE_APP_DEFAULT:
+ case TASK_APPTYPE_APP_TAL:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/* for telemetry */
+integer_t
+task_grab_latency_qos(task_t task)
+{
+ return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
+}
+
+/* update the darwin background action state in the flags field for libproc */
+int
+proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
+{
+ if (task->requested_policy.trp_ext_darwinbg)
+ *flagsp |= PROC_FLAG_EXT_DARWINBG;
+
+ if (task->requested_policy.trp_int_darwinbg)
+ *flagsp |= PROC_FLAG_DARWINBG;
+
+#if CONFIG_EMBEDDED
+ if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND)
+ *flagsp |= PROC_FLAG_IOS_APPLEDAEMON;
+
+ if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
+ *flagsp |= PROC_FLAG_IOS_IMPPROMOTION;
+#endif /* CONFIG_EMBEDDED */
+
+ if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
+ task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL)
+ *flagsp |= PROC_FLAG_APPLICATION;
+
+ if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
+ *flagsp |= PROC_FLAG_ADAPTIVE;
+
+ if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
+ task->requested_policy.trp_boosted == 1)
+ *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
+
+ if (task_is_importance_donor(task))
+ *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
+
+ if (task->effective_policy.tep_sup_active)
+ *flagsp |= PROC_FLAG_SUPPRESSED;
+
+ return(0);
+}
+
+/*
+ * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
+ * The current scheme packs as much data into a single tracepoint as it can.
+ *
+ * Each task/thread requested/effective structure is 64 bits in size. Any
+ * given tracepoint will emit either requested or effective data, but not both.
+ *
+ * A tracepoint may emit any of task, thread, or task & thread data.
+ *
+ * The type of data emitted varies with pointer size. Where possible, both
+ * task and thread data are emitted. In LP32 systems, the first and second
+ * halves of either the task or thread data is emitted.
+ *
+ * The code uses uintptr_t array indexes instead of high/low to avoid
+ * confusion WRT big vs little endian.
+ *
+ * The truth table for the tracepoint data functions is below, and has the
+ * following invariants:
+ *
+ * 1) task and thread are uintptr_t*
+ * 2) task may never be NULL
+ *
+ *
+ * LP32 LP64
+ * trequested_0(task, NULL) task[0] task[0]
+ * trequested_1(task, NULL) task[1] NULL
+ * trequested_0(task, thread) thread[0] task[0]
+ * trequested_1(task, thread) thread[1] thread[0]
+ *
+ * Basically, you get a full task or thread on LP32, and both on LP64.
+ *
+ * The uintptr_t munging here is squicky enough to deserve a comment.
+ *
+ * The variables we are accessing are laid out in memory like this:
+ *
+ * [ LP64 uintptr_t 0 ]
+ * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
+ *
+ * 1 2 3 4 5 6 7 8
+ *
+ */
+
+static uintptr_t
+trequested_0(task_t task)
+{
+ static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
+
+ uintptr_t* raw = (uintptr_t*)&task->requested_policy;
+
+ return raw[0];
+}
+
+static uintptr_t
+trequested_1(task_t task)
+{
+#if defined __LP64__
+ (void)task;
+ return 0;
+#else
+ uintptr_t* raw = (uintptr_t*)(&task->requested_policy);
+ return raw[1];
+#endif
+}
+
+static uintptr_t
+teffective_0(task_t task)
+{
+ uintptr_t* raw = (uintptr_t*)&task->effective_policy;
+
+ return raw[0];
+}
+
+static uintptr_t
+teffective_1(task_t task)
+{
+#if defined __LP64__
+ (void)task;
+ return 0;
+#else
+ uintptr_t* raw = (uintptr_t*)(&task->effective_policy);
+ return raw[1];
+#endif
+}
+
+/* dump pending for tracepoint */
+uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); }
+
+uint64_t
+task_requested_bitfield(task_t task)
+{
+ uint64_t bits = 0;
+ struct task_requested_policy requested = task->requested_policy;
+
+ bits |= (requested.trp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
+ bits |= (requested.trp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
+ bits |= (requested.trp_int_iotier ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
+ bits |= (requested.trp_ext_iotier ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
+ bits |= (requested.trp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
+ bits |= (requested.trp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
+ bits |= (requested.trp_bg_iotier ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0);
+ bits |= (requested.trp_terminated ? POLICY_REQ_TERMINATED : 0);
+
+ bits |= (requested.trp_boosted ? POLICY_REQ_BOOSTED : 0);
+ bits |= (requested.trp_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0);
+ bits |= (requested.trp_apptype ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0);
+ bits |= (requested.trp_role ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT) : 0);
+
+ bits |= (requested.trp_sup_active ? POLICY_REQ_SUP_ACTIVE : 0);
+ bits |= (requested.trp_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0);
+ bits |= (requested.trp_sup_cpu ? POLICY_REQ_SUP_CPU : 0);
+ bits |= (requested.trp_sup_timer ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
+ bits |= (requested.trp_sup_throughput ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0);
+ bits |= (requested.trp_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0);
+ bits |= (requested.trp_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0);
+
+ bits |= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
+ bits |= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
+ bits |= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
+ bits |= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
+ bits |= (requested.trp_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0);
+ bits |= (requested.trp_qos_clamp ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0);
+
+ return bits;
+}
+
+uint64_t
+task_effective_bitfield(task_t task)
+{
+ uint64_t bits = 0;
+ struct task_effective_policy effective = task->effective_policy;
+
+ bits |= (effective.tep_io_tier ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
+ bits |= (effective.tep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
+ bits |= (effective.tep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
+ bits |= (effective.tep_lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0);
+ bits |= (effective.tep_terminated ? POLICY_EFF_TERMINATED : 0);
+ bits |= (effective.tep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
+ bits |= (effective.tep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
+ bits |= (effective.tep_bg_iotier ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
+ bits |= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
+
+ bits |= (effective.tep_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0);
+ bits |= (effective.tep_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0);
+ bits |= (effective.tep_sup_active ? POLICY_EFF_SUP_ACTIVE : 0);
+ bits |= (effective.tep_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0);
+ bits |= (effective.tep_role ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT) : 0);
+ bits |= (effective.tep_latency_qos ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
+ bits |= (effective.tep_through_qos ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
+ bits |= (effective.tep_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0);
+ bits |= (effective.tep_qos_ceiling ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
+
+ return bits;
+}
+
+
+/*
+ * Resource usage and CPU related routines
+ */
+
+int
+proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
+{
+
+ int error = 0;
+ int scope;
+
+ task_lock(task);
+
+
+ error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
+ task_unlock(task);
+
+ /*
+ * Reverse-map from CPU resource limit scopes back to policies (see comment below).
+ */
+ if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+ *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
+ } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
+ *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
+ } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
+ *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+ }
+
+ return(error);
+}
+
+/*
+ * Configure the default CPU usage monitor parameters.
+ *
+ * For tasks which have this mechanism activated: if any thread in the
+ * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
+ */
+void
+proc_init_cpumon_params(void)
+{
+ /*
+ * The max CPU percentage can be configured via the boot-args and
+ * a key in the device tree. The boot-args are honored first, then the
+ * device tree.
+ */
+ if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
+ sizeof (proc_max_cpumon_percentage)))
+ {
+ uint64_t max_percentage = 0ULL;
+
+ if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
+ sizeof(max_percentage)))
+ {
+ max_percentage = DEFAULT_CPUMON_PERCENTAGE;
+ }
+
+ assert(max_percentage <= UINT8_MAX);
+ proc_max_cpumon_percentage = (uint8_t) max_percentage;
+ }
+
+ if (proc_max_cpumon_percentage > 100) {
+ proc_max_cpumon_percentage = 100;
+ }
+
+ /*
+ * The interval should be specified in seconds.
+ *
+ * Like the max CPU percentage, the max CPU interval can be configured
+ * via boot-args and the device tree.
+ */
+ if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
+ sizeof (proc_max_cpumon_interval)))
+ {
+ if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
+ sizeof(proc_max_cpumon_interval)))
+ {
+ proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
+ }
+ }
+
+ proc_max_cpumon_interval *= NSEC_PER_SEC;
+
+ /* TEMPORARY boot arg to control App suppression */
+ PE_parse_boot_argn("task_policy_suppression_disable",
+ &task_policy_suppression_disable,
+ sizeof(task_policy_suppression_disable));
+}
+
+/*
+ * Currently supported configurations for CPU limits.
+ *
+ * Policy | Deadline-based CPU limit | Percentage-based CPU limit
+ * -------------------------------------+--------------------------+------------------------------
+ * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only
+ * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP
+ * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP
+ * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP
+ * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only
+ *
+ * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
+ * after the specified amount of wallclock time has elapsed.
+ *
+ * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
+ * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
+ * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
+ * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
+ *
+ * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
+ * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
+ * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
+ * but the potential consumer of the API at the time was insisting on wallclock time instead.
+ *
+ * Currently, requesting notification via an exception is the only way to get per-thread scope for a
+ * CPU limit. All other types of notifications force task-wide scope for the limit.
+ */
+int
+proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
+ int cpumon_entitled)
+{
+ int error = 0;
+ int scope;
+
+ /*
+ * Enforce the matrix of supported configurations for policy, percentage, and deadline.
+ */
+ switch (policy) {
+ // If no policy is explicitly given, the default is to throttle.
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
+ if (deadline != 0)
+ return (ENOTSUP);
+ scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
+ break;
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
+ if (percentage != 0)
+ return (ENOTSUP);
+ scope = TASK_RUSECPU_FLAGS_DEADLINE;
+ break;
+ case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
+ if (deadline != 0)
+ return (ENOTSUP);
+ scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
+#ifdef CONFIG_NOMONITORS
+ return (error);
+#endif /* CONFIG_NOMONITORS */
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ task_lock(task);
+ if (task != current_task()) {
+ task->policy_ru_cpu_ext = policy;
+ } else {
+ task->policy_ru_cpu = policy;
+ }
+ error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
+ task_unlock(task);
+ return(error);
+}
+
+/* TODO: get rid of these */
+#define TASK_POLICY_CPU_RESOURCE_USAGE 0
+#define TASK_POLICY_WIREDMEM_RESOURCE_USAGE 1
+#define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE 2
+#define TASK_POLICY_DISK_RESOURCE_USAGE 3
+#define TASK_POLICY_NETWORK_RESOURCE_USAGE 4
+#define TASK_POLICY_POWER_RESOURCE_USAGE 5
+
+#define TASK_POLICY_RESOURCE_USAGE_COUNT 6
+
+int
+proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
+{
+ int error = 0;
+ int action;
+ void * bsdinfo = NULL;
+
+ task_lock(task);
+ if (task != current_task()) {
+ task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
+ } else {
+ task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
+ }
+
+ error = task_clear_cpuusage_locked(task, cpumon_entitled);
+ if (error != 0)
+ goto out;
+
+ action = task->applied_ru_cpu;
+ if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+ /* reset action */
+ task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+ }
+ if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+ bsdinfo = task->bsd_info;
+ task_unlock(task);
+ proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
+ goto out1;
+ }
+
+out:
+ task_unlock(task);
+out1:
+ return(error);
+
+}
+
+/* used to apply resource limit related actions */
+static int
+task_apply_resource_actions(task_t task, int type)
+{
+ int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+ void * bsdinfo = NULL;
+
+ switch (type) {
+ case TASK_POLICY_CPU_RESOURCE_USAGE:
+ break;
+ case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
+ case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
+ case TASK_POLICY_DISK_RESOURCE_USAGE:
+ case TASK_POLICY_NETWORK_RESOURCE_USAGE:
+ case TASK_POLICY_POWER_RESOURCE_USAGE:
+ return(0);
+
+ default:
+ return(1);
+ };
+
+ /* only cpu actions for now */
+ task_lock(task);
+
+ if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+ /* apply action */
+ task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
+ action = task->applied_ru_cpu_ext;
+ } else {
+ action = task->applied_ru_cpu_ext;
+ }
+
+ if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+ bsdinfo = task->bsd_info;
+ task_unlock(task);
+ proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
+ } else
+ task_unlock(task);
+
+ return(0);
+}
+
+/*
+ * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
+ * only allows for one at a time. This means that if there is a per-thread limit active, the other
+ * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
+ * to the caller, and prefer that, but there's no need for that at the moment.
+ */
+static int
+task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
+{
+ *percentagep = 0;
+ *intervalp = 0;
+ *deadlinep = 0;
+
+ if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
+ *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
+ *percentagep = task->rusage_cpu_perthr_percentage;
+ *intervalp = task->rusage_cpu_perthr_interval;
+ } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
+ *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
+ *percentagep = task->rusage_cpu_percentage;
+ *intervalp = task->rusage_cpu_interval;
+ } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
+ *scope = TASK_RUSECPU_FLAGS_DEADLINE;
+ *deadlinep = task->rusage_cpu_deadline;
+ } else {
+ *scope = 0;
+ }
+
+ return(0);
+}
+
+/*
+ * Suspend the CPU usage monitor for the task. Return value indicates
+ * if the mechanism was actually enabled.
+ */
+int
+task_suspend_cpumon(task_t task)
+{
+ thread_t thread;
+
+ task_lock_assert_owned(task);
+
+ if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+#if CONFIG_TELEMETRY
+ /*
+ * Disable task-wide telemetry if it was ever enabled by the CPU usage
+ * monitor's warning zone.
+ */
+ telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
+#endif
+
+ /*
+ * Suspend monitoring for the task, and propagate that change to each thread.
+ */
+ task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
+ queue_iterate(&task->threads, thread, thread_t, task_threads) {
+ act_set_astledger(thread);
+ }
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Remove all traces of the CPU monitor.
+ */
+int
+task_disable_cpumon(task_t task)
+{
+ int kret;
+
+ task_lock_assert_owned(task);
+
+ kret = task_suspend_cpumon(task);
+ if (kret) return kret;
+
+ /* Once we clear these values, the monitor can't be resumed */
+ task->rusage_cpu_perthr_percentage = 0;
+ task->rusage_cpu_perthr_interval = 0;
+
+ return (KERN_SUCCESS);
+}
+
+
+static int
+task_enable_cpumon_locked(task_t task)
+{
+ thread_t thread;
+ task_lock_assert_owned(task);
+
+ if (task->rusage_cpu_perthr_percentage == 0 ||
+ task->rusage_cpu_perthr_interval == 0) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
+ queue_iterate(&task->threads, thread, thread_t, task_threads) {
+ act_set_astledger(thread);
+ }
+
+ return KERN_SUCCESS;
+}
+
+int
+task_resume_cpumon(task_t task)
+{
+ kern_return_t kret;
+
+ if (!task) {
+ return EINVAL;
+ }
+
+ task_lock(task);
+ kret = task_enable_cpumon_locked(task);
+ task_unlock(task);
+
+ return kret;
+}
+
+
+/* duplicate values from bsd/sys/process_policy.h */
+#define PROC_POLICY_CPUMON_DISABLE 0xFF
+#define PROC_POLICY_CPUMON_DEFAULTS 0xFE
+
+static int
+task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
+{
+ uint64_t abstime = 0;
+ uint64_t limittime = 0;
+
+ lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
+
+ /* By default, refill once per second */
+ if (interval == 0)
+ interval = NSEC_PER_SEC;
+
+ if (percentage != 0) {
+ if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+ boolean_t warn = FALSE;
+
+ /*
+ * A per-thread CPU limit on a task generates an exception
+ * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
+ * exceeds the limit.
+ */
+
+ if (percentage == PROC_POLICY_CPUMON_DISABLE) {
+ if (cpumon_entitled) {
+ /* 25095698 - task_disable_cpumon() should be reliable */
+ task_disable_cpumon(task);
+ return 0;
+ }
+
+ /*
+ * This task wishes to disable the CPU usage monitor, but it's
+ * missing the required entitlement:
+ * com.apple.private.kernel.override-cpumon
+ *
+ * Instead, treat this as a request to reset its params
+ * back to the defaults.
+ */
+ warn = TRUE;
+ percentage = PROC_POLICY_CPUMON_DEFAULTS;
+ }
+
+ if (percentage == PROC_POLICY_CPUMON_DEFAULTS) {
+ percentage = proc_max_cpumon_percentage;
+ interval = proc_max_cpumon_interval;
+ }
+
+ if (percentage > 100) {
+ percentage = 100;
+ }
+
+ /*
+ * Passing in an interval of -1 means either:
+ * - Leave the interval as-is, if there's already a per-thread
+ * limit configured
+ * - Use the system default.
+ */
+ if (interval == -1ULL) {
+ if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+ interval = task->rusage_cpu_perthr_interval;
+ } else {
+ interval = proc_max_cpumon_interval;
+ }
+ }
+
+ /*
+ * Enforce global caps on CPU usage monitor here if the process is not
+ * entitled to escape the global caps.
+ */
+ if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
+ warn = TRUE;
+ percentage = proc_max_cpumon_percentage;
+ }
+
+ if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
+ warn = TRUE;
+ interval = proc_max_cpumon_interval;
+ }
+
+ if (warn) {
+ int pid = 0;
+ const char *procname = "unknown";
+
+#ifdef MACH_BSD
+ pid = proc_selfpid();
+ if (current_task()->bsd_info != NULL) {
+ procname = proc_name_address(current_task()->bsd_info);
+ }
+#endif
+
+ printf("process %s[%d] denied attempt to escape CPU monitor"
+ " (missing required entitlement).\n", procname, pid);
+ }
+
+ /* configure the limit values */
+ task->rusage_cpu_perthr_percentage = percentage;
+ task->rusage_cpu_perthr_interval = interval;
+
+ /* and enable the CPU monitor */
+ (void)task_enable_cpumon_locked(task);
+ } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
+ /*
+ * Currently, a proc-wide CPU limit always blocks if the limit is
+ * exceeded (LEDGER_ACTION_BLOCK).
+ */
+ task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
+ task->rusage_cpu_percentage = percentage;
+ task->rusage_cpu_interval = interval;
+
+ limittime = (interval * percentage) / 100;
+ nanoseconds_to_absolutetime(limittime, &abstime);
+
+ ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
+ ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
+ ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
+ }
+ }
+
+ if (deadline != 0) {
+ assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
+
+ /* if already in use, cancel and wait for it to cleanout */
+ if (task->rusage_cpu_callt != NULL) {
+ task_unlock(task);
+ thread_call_cancel_wait(task->rusage_cpu_callt);
+ task_lock(task);
+ }
+ if (task->rusage_cpu_callt == NULL) {
+ task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
+ }
+ /* setup callout */
+ if (task->rusage_cpu_callt != 0) {
+ uint64_t save_abstime = 0;
+
+ task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
+ task->rusage_cpu_deadline = deadline;
+
+ nanoseconds_to_absolutetime(deadline, &abstime);
+ save_abstime = abstime;
+ clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
+ thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
+ }
+ }
+
+ return(0);
+}
+
+int
+task_clear_cpuusage(task_t task, int cpumon_entitled)
+{
+ int retval = 0;
+
+ task_lock(task);
+ retval = task_clear_cpuusage_locked(task, cpumon_entitled);
+ task_unlock(task);
+
+ return(retval);
+}
+
+static int
+task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
+{
+ thread_call_t savecallt;
+
+ /* cancel percentage handling if set */
+ if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
+ task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
+ ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
+ task->rusage_cpu_percentage = 0;
+ task->rusage_cpu_interval = 0;
+ }
+
+ /*
+ * Disable the CPU usage monitor.
+ */
+ if (cpumon_entitled) {
+ task_disable_cpumon(task);
+ }
+
+ /* cancel deadline handling if set */
+ if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
+ task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
+ if (task->rusage_cpu_callt != 0) {
+ savecallt = task->rusage_cpu_callt;
+ task->rusage_cpu_callt = NULL;
+ task->rusage_cpu_deadline = 0;
+ task_unlock(task);
+ thread_call_cancel_wait(savecallt);
+ thread_call_free(savecallt);
+ task_lock(task);
+ }
+ }
+ return(0);
+}
+
+/* called by ledger unit to enforce action due to resource usage criteria being met */
+static void
+task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
+{
+ task_t task = (task_t)param0;
+ (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
+ return;
+}
+
+
+/*
+ * Routines for taskwatch and pidbind
+ */
+
+#if CONFIG_EMBEDDED
+
+lck_mtx_t task_watch_mtx;
+
+void
+task_watch_init(void)
+{
+ lck_mtx_init(&task_watch_mtx, &task_lck_grp, &task_lck_attr);
+}
+
+static void
+task_watch_lock(void)
+{
+ lck_mtx_lock(&task_watch_mtx);
+}
+
+static void
+task_watch_unlock(void)
+{
+ lck_mtx_unlock(&task_watch_mtx);
+}
+
+static void
+add_taskwatch_locked(task_t task, task_watch_t * twp)
+{
+ queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
+ task->num_taskwatchers++;
+
+}
+
+static void
+remove_taskwatch_locked(task_t task, task_watch_t * twp)
+{
+ queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
+ task->num_taskwatchers--;
+}
+
+
+int
+proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
+{
+ thread_t target_thread = NULL;
+ int ret = 0, setbg = 0;
+ task_watch_t *twp = NULL;
+ task_t task = TASK_NULL;
+
+ target_thread = task_findtid(curtask, tid);
+ if (target_thread == NULL)
+ return ESRCH;
+ /* holds thread reference */
+
+ if (bind != 0) {
+ /* task is still active ? */
+ task_lock(target_task);
+ if (target_task->active == 0) {
+ task_unlock(target_task);
+ ret = ESRCH;
+ goto out;
+ }
+ task_unlock(target_task);
+
+ twp = (task_watch_t *)kalloc(sizeof(task_watch_t));
+ if (twp == NULL) {
+ task_watch_unlock();
+ ret = ENOMEM;
+ goto out;
+ }
+
+ bzero(twp, sizeof(task_watch_t));
+
+ task_watch_lock();
+
+ if (target_thread->taskwatch != NULL){
+ /* already bound to another task */
+ task_watch_unlock();
+
+ kfree(twp, sizeof(task_watch_t));
+ ret = EBUSY;
+ goto out;
+ }
+
+ task_reference(target_task);
+
+ setbg = proc_get_effective_task_policy(target_task, TASK_POLICY_WATCHERS_BG);
+
+ twp->tw_task = target_task; /* holds the task reference */
+ twp->tw_thread = target_thread; /* holds the thread reference */
+ twp->tw_state = setbg;
+ twp->tw_importance = target_thread->importance;
+
+ add_taskwatch_locked(target_task, twp);
+
+ target_thread->taskwatch = twp;
+
+ task_watch_unlock();
+
+ if (setbg)
+ set_thread_appbg(target_thread, setbg, INT_MIN);
+
+ /* retain the thread reference as it is in twp */
+ target_thread = NULL;
+ } else {
+ /* unbind */
+ task_watch_lock();
+ if ((twp = target_thread->taskwatch) != NULL) {
+ task = twp->tw_task;
+ target_thread->taskwatch = NULL;
+ remove_taskwatch_locked(task, twp);
+
+ task_watch_unlock();
+
+ task_deallocate(task); /* drop task ref in twp */
+ set_thread_appbg(target_thread, 0, twp->tw_importance);
+ thread_deallocate(target_thread); /* drop thread ref in twp */
+ kfree(twp, sizeof(task_watch_t));
+ } else {
+ task_watch_unlock();
+ ret = 0; /* return success if it not alredy bound */
+ goto out;
+ }
+ }
+out:
+ thread_deallocate(target_thread); /* drop thread ref acquired in this routine */
+ return(ret);
+}
+
+static void
+set_thread_appbg(thread_t thread, int setbg, __unused int importance)
+{
+ int enable = (setbg ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE);
+
+ proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_PIDBIND_BG, enable);
+}
+
+static void
+apply_appstate_watchers(task_t task)
+{
+ int numwatchers = 0, i, j, setbg;
+ thread_watchlist_t * threadlist;
+ task_watch_t * twp;
+
+retry:
+ /* if no watchers on the list return */
+ if ((numwatchers = task->num_taskwatchers) == 0)
+ return;
+
+ threadlist = (thread_watchlist_t *)kalloc(numwatchers*sizeof(thread_watchlist_t));
+ if (threadlist == NULL)
+ return;
+
+ bzero(threadlist, numwatchers*sizeof(thread_watchlist_t));
+
+ task_watch_lock();
+ /*serialize application of app state changes */
+
+ if (task->watchapplying != 0) {
+ lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT);
+ task_watch_unlock();
+ kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+ goto retry;
+ }
+
+ if (numwatchers != task->num_taskwatchers) {
+ task_watch_unlock();
+ kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+ goto retry;
+ }
+
+ setbg = proc_get_effective_task_policy(task, TASK_POLICY_WATCHERS_BG);
+
+ task->watchapplying = 1;
+ i = 0;
+ queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
+
+ threadlist[i].thread = twp->tw_thread;
+ thread_reference(threadlist[i].thread);
+ if (setbg != 0) {
+ twp->tw_importance = twp->tw_thread->importance;
+ threadlist[i].importance = INT_MIN;
+ } else
+ threadlist[i].importance = twp->tw_importance;
+ i++;
+ if (i > numwatchers)
+ break;
+ }
+
+ task_watch_unlock();
+
+ for (j = 0; j< i; j++) {
+ set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance);
+ thread_deallocate(threadlist[j].thread);
+ }
+ kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+
+
+ task_watch_lock();
+ task->watchapplying = 0;
+ thread_wakeup_one(&task->watchapplying);
+ task_watch_unlock();
+}
+
+void
+thead_remove_taskwatch(thread_t thread)
+{
+ task_watch_t * twp;
+ int importance = 0;
+
+ task_watch_lock();
+ if ((twp = thread->taskwatch) != NULL) {
+ thread->taskwatch = NULL;
+ remove_taskwatch_locked(twp->tw_task, twp);
+ }
+ task_watch_unlock();
+ if (twp != NULL) {
+ thread_deallocate(twp->tw_thread);
+ task_deallocate(twp->tw_task);
+ importance = twp->tw_importance;
+ kfree(twp, sizeof(task_watch_t));
+ /* remove the thread and networkbg */
+ set_thread_appbg(thread, 0, importance);
+ }
+}
+
+void
+task_removewatchers(task_t task)
+{
+ int numwatchers = 0, i, j;
+ task_watch_t ** twplist = NULL;
+ task_watch_t * twp = NULL;
+
+retry:
+ if ((numwatchers = task->num_taskwatchers) == 0)
+ return;
+
+ twplist = (task_watch_t **)kalloc(numwatchers*sizeof(task_watch_t *));
+ if (twplist == NULL)
+ return;
+
+ bzero(twplist, numwatchers*sizeof(task_watch_t *));
+
+ task_watch_lock();
+ if (task->num_taskwatchers == 0) {
+ task_watch_unlock();
+ goto out;
+ }
+
+ if (numwatchers != task->num_taskwatchers) {
+ task_watch_unlock();
+ kfree(twplist, numwatchers*sizeof(task_watch_t *));
+ numwatchers = 0;
+ goto retry;
+ }
+
+ i = 0;
+ while((twp = (task_watch_t *)dequeue_head(&task->task_watchers)) != NULL)
+ {
+ twplist[i] = twp;
+ task->num_taskwatchers--;
+
+ /*
+ * Since the linkage is removed and thead state cleanup is already set up,
+ * remove the refernce from the thread.
+ */
+ twp->tw_thread->taskwatch = NULL; /* removed linkage, clear thread holding ref */
+ i++;
+ if ((task->num_taskwatchers == 0) || (i > numwatchers))
+ break;
+ }
+
+ task_watch_unlock();
+
+ for (j = 0; j< i; j++) {
+
+ twp = twplist[j];
+ /* remove thread and network bg */
+ set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
+ thread_deallocate(twp->tw_thread);
+ task_deallocate(twp->tw_task);
+ kfree(twp, sizeof(task_watch_t));
+ }
+
+out:
+ kfree(twplist, numwatchers*sizeof(task_watch_t *));
+
+}
+#endif /* CONFIG_EMBEDDED */
+
+/*
+ * Routines for importance donation/inheritance/boosting
+ */
+
+static void
+task_importance_update_live_donor(task_t target_task)
+{
+#if IMPORTANCE_INHERITANCE
+
+ ipc_importance_task_t task_imp;
+
+ task_imp = ipc_importance_for_task(target_task, FALSE);
+ if (IIT_NULL != task_imp) {
+ ipc_importance_task_update_live_donor(task_imp);
+ ipc_importance_task_release(task_imp);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+void
+task_importance_mark_donor(task_t task, boolean_t donating)
+{
+#if IMPORTANCE_INHERITANCE
+ ipc_importance_task_t task_imp;
+
+ task_imp = ipc_importance_for_task(task, FALSE);
+ if (IIT_NULL != task_imp) {
+ ipc_importance_task_mark_donor(task_imp, donating);
+ ipc_importance_task_release(task_imp);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+void
+task_importance_mark_live_donor(task_t task, boolean_t live_donating)
+{
+#if IMPORTANCE_INHERITANCE
+ ipc_importance_task_t task_imp;
+
+ task_imp = ipc_importance_for_task(task, FALSE);
+ if (IIT_NULL != task_imp) {
+ ipc_importance_task_mark_live_donor(task_imp, live_donating);
+ ipc_importance_task_release(task_imp);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+void
+task_importance_mark_receiver(task_t task, boolean_t receiving)
+{
+#if IMPORTANCE_INHERITANCE
+ ipc_importance_task_t task_imp;
+
+ task_imp = ipc_importance_for_task(task, FALSE);
+ if (IIT_NULL != task_imp) {
+ ipc_importance_task_mark_receiver(task_imp, receiving);
+ ipc_importance_task_release(task_imp);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+void
+task_importance_mark_denap_receiver(task_t task, boolean_t denap)
+{
+#if IMPORTANCE_INHERITANCE
+ ipc_importance_task_t task_imp;
+
+ task_imp = ipc_importance_for_task(task, FALSE);
+ if (IIT_NULL != task_imp) {
+ ipc_importance_task_mark_denap_receiver(task_imp, denap);
+ ipc_importance_task_release(task_imp);
+ }
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+void
+task_importance_reset(__imp_only task_t task)
+{
+#if IMPORTANCE_INHERITANCE
+ ipc_importance_task_t task_imp;
+
+ /* TODO: Lower importance downstream before disconnect */
+ task_imp = task->task_imp_base;
+ ipc_importance_reset(task_imp, FALSE);
+ task_importance_update_live_donor(task);
+#endif /* IMPORTANCE_INHERITANCE */
+}
+
+#if IMPORTANCE_INHERITANCE
+
+/*
+ * Sets the task boost bit to the provided value. Does NOT run the update function.
+ *
+ * Task lock must be held.
+ */
+static void
+task_set_boost_locked(task_t task, boolean_t boost_active)
+{
+#if IMPORTANCE_TRACE
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
+ proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), 0);
+#endif /* IMPORTANCE_TRACE */
+
+ task->requested_policy.trp_boosted = boost_active;
+
+#if IMPORTANCE_TRACE
+ if (boost_active == TRUE){
+ DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
+ } else {
+ DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
+ }
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
+ proc_selfpid(), task_pid(task),
+ trequested_0(task), trequested_1(task), 0);
+#endif /* IMPORTANCE_TRACE */
+}
+
+/*
+ * Sets the task boost bit to the provided value and applies the update.
+ *
+ * Task lock must be held. Must call update complete after unlocking the task.
+ */
+void
+task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
+{
+ task_set_boost_locked(task, boost_active);
+
+ task_policy_update_locked(task, pend_token);
+}
+
+/*
+ * Check if this task should donate importance.
+ *
+ * May be called without taking the task lock. In that case, donor status can change
+ * so you must check only once for each donation event.
+ */
+boolean_t
+task_is_importance_donor(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_donor(task->task_imp_base);
+}
+
+/*
+ * Query the status of the task's donor mark.
+ */
+boolean_t
+task_is_marked_importance_donor(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_marked_donor(task->task_imp_base);
+}
+
+/*
+ * Query the status of the task's live donor and donor mark.
+ */
+boolean_t
+task_is_marked_live_importance_donor(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
+}
+
+
+/*
+ * This routine may be called without holding task lock
+ * since the value of imp_receiver can never be unset.
+ */
+boolean_t
+task_is_importance_receiver(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_marked_receiver(task->task_imp_base);
+}
+
+/*
+ * Query the task's receiver mark.
+ */
+boolean_t
+task_is_marked_importance_receiver(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_marked_receiver(task->task_imp_base);
+}
+
+/*
+ * This routine may be called without holding task lock
+ * since the value of de-nap receiver can never be unset.
+ */
+boolean_t
+task_is_importance_denap_receiver(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_denap_receiver(task->task_imp_base);
+}
+
+/*
+ * Query the task's de-nap receiver mark.
+ */
+boolean_t
+task_is_marked_importance_denap_receiver(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
+}
+
+/*
+ * This routine may be called without holding task lock
+ * since the value of imp_receiver can never be unset.
+ */
+boolean_t
+task_is_importance_receiver_type(task_t task)
+{
+ if (task->task_imp_base == IIT_NULL)
+ return FALSE;
+ return (task_is_importance_receiver(task) ||
+ task_is_importance_denap_receiver(task));
+}
+
+/*
+ * External importance assertions are managed by the process in userspace
+ * Internal importance assertions are the responsibility of the kernel
+ * Assertions are changed from internal to external via task_importance_externalize_assertion
+ */
+
+int
+task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
+{
+ ipc_importance_task_t task_imp;
+ kern_return_t ret;
+
+ /* may be first time, so allow for possible importance setup */
+ task_imp = ipc_importance_for_task(target_task, FALSE);
+ if (IIT_NULL == task_imp) {
+ return EOVERFLOW;
+ }
+ ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
+ ipc_importance_task_release(task_imp);
+
+ return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
+}
+
+int
+task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
+{
+ ipc_importance_task_t task_imp;
+ kern_return_t ret;
+
+ /* may be first time, so allow for possible importance setup */
+ task_imp = ipc_importance_for_task(target_task, FALSE);
+ if (IIT_NULL == task_imp) {
+ return EOVERFLOW;
+ }
+ ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
+ ipc_importance_task_release(task_imp);
+
+ return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
+}
+
+int
+task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
+{
+ ipc_importance_task_t task_imp;
+ kern_return_t ret;
+
+ /* must already have set up an importance */
+ task_imp = target_task->task_imp_base;
+ if (IIT_NULL == task_imp) {
+ return EOVERFLOW;
+ }
+ ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
+ return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
+}
+
+int
+task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
+{
+ ipc_importance_task_t task_imp;
+ kern_return_t ret;
+
+ /* must already have set up an importance */
+ task_imp = target_task->task_imp_base;
+ if (IIT_NULL == task_imp) {
+ return EOVERFLOW;
+ }
+ ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
+ return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
+}
+
+int
+task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
+{
+ ipc_importance_task_t task_imp;
+ kern_return_t ret;
+
+ /* must already have set up an importance */
+ task_imp = target_task->task_imp_base;
+ if (IIT_NULL == task_imp) {
+ return EOVERFLOW;
+ }
+ ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
+ return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
+}
+
+static void
+task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
+{
+ int boost = 0;
+
+ __imptrace_only int released_pid = 0;
+ __imptrace_only int pid = task_pid(task);
+
+ ipc_importance_task_t release_imp_task = IIT_NULL;
+
+ if (IP_VALID(port) != 0) {
+ ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
+
+ ip_lock(port);
+
+ /*
+ * The port must have been marked tempowner already.
+ * This also filters out ports whose receive rights
+ * are already enqueued in a message, as you can't
+ * change the right's destination once it's already
+ * on its way.
+ */
+ if (port->ip_tempowner != 0) {
+ assert(port->ip_impdonation != 0);
+
+ boost = port->ip_impcount;
+ if (IIT_NULL != port->ip_imp_task) {
+ /*
+ * if this port is already bound to a task,
+ * release the task reference and drop any
+ * watchport-forwarded boosts
+ */
+ release_imp_task = port->ip_imp_task;
+ port->ip_imp_task = IIT_NULL;
+ }
+
+ /* mark the port is watching another task (reference held in port->ip_imp_task) */
+ if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
+ port->ip_imp_task = new_imp_task;
+ new_imp_task = IIT_NULL;
+ }
+ }
+ ip_unlock(port);
+
+ if (IIT_NULL != new_imp_task) {
+ ipc_importance_task_release(new_imp_task);
+ }
+
+ if (IIT_NULL != release_imp_task) {
+ if (boost > 0)
+ ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
+
+ // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
+ ipc_importance_task_release(release_imp_task);
+ }
+#if IMPORTANCE_TRACE
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
+ proc_selfpid(), pid, boost, released_pid, 0);
+#endif /* IMPORTANCE_TRACE */
+ }
+
+ *boostp = boost;
+ return;
+}
+
+#endif /* IMPORTANCE_INHERITANCE */
+
+/*
+ * Routines for VM to query task importance
+ */
+
+
+/*
+ * Order to be considered while estimating importance
+ * for low memory notification and purging purgeable memory.
+ */
+#define TASK_IMPORTANCE_FOREGROUND 4
+#define TASK_IMPORTANCE_NOTDARWINBG 1
+
+
+/*
+ * (Un)Mark the task as a privileged listener for memory notifications.
+ * if marked, this task will be among the first to be notified amongst
+ * the bulk of all other tasks when the system enters a pressure level
+ * of interest to this task.
+ */
+int
+task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
+{
+ if (old_value != NULL) {
+ *old_value = (boolean_t)task->low_mem_privileged_listener;
+ } else {
+ task_lock(task);
+ task->low_mem_privileged_listener = (uint32_t)new_value;
+ task_unlock(task);
+ }
+
+ return 0;
+}
+
+/*
+ * Checks if the task is already notified.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+boolean_t
+task_has_been_notified(task_t task, int pressurelevel)
+{
+ if (task == NULL) {
+ return FALSE;
+ }
+
+ if (pressurelevel == kVMPressureWarning)
+ return (task->low_mem_notified_warn ? TRUE : FALSE);
+ else if (pressurelevel == kVMPressureCritical)
+ return (task->low_mem_notified_critical ? TRUE : FALSE);
+ else
+ return TRUE;
+}
+
+
+/*
+ * Checks if the task is used for purging.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+boolean_t
+task_used_for_purging(task_t task, int pressurelevel)
+{
+ if (task == NULL) {
+ return FALSE;
+ }
+
+ if (pressurelevel == kVMPressureWarning)
+ return (task->purged_memory_warn ? TRUE : FALSE);
+ else if (pressurelevel == kVMPressureCritical)
+ return (task->purged_memory_critical ? TRUE : FALSE);
+ else
+ return TRUE;
+}
+
+
+/*
+ * Mark the task as notified with memory notification.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+void
+task_mark_has_been_notified(task_t task, int pressurelevel)
+{
+ if (task == NULL) {
+ return;
+ }
+
+ if (pressurelevel == kVMPressureWarning)
+ task->low_mem_notified_warn = 1;
+ else if (pressurelevel == kVMPressureCritical)
+ task->low_mem_notified_critical = 1;
+}
+
+
+/*
+ * Mark the task as purged.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+void
+task_mark_used_for_purging(task_t task, int pressurelevel)
+{
+ if (task == NULL) {
+ return;
+ }
+
+ if (pressurelevel == kVMPressureWarning)
+ task->purged_memory_warn = 1;
+ else if (pressurelevel == kVMPressureCritical)
+ task->purged_memory_critical = 1;
+}
+
+
+/*
+ * Mark the task eligible for low memory notification.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+void
+task_clear_has_been_notified(task_t task, int pressurelevel)
+{
+ if (task == NULL) {
+ return;
+ }
+
+ if (pressurelevel == kVMPressureWarning)
+ task->low_mem_notified_warn = 0;
+ else if (pressurelevel == kVMPressureCritical)
+ task->low_mem_notified_critical = 0;
+}
+
+
+/*
+ * Mark the task eligible for purging its purgeable memory.
+ *
+ * Condition: task lock should be held while calling this function.
+ */
+void
+task_clear_used_for_purging(task_t task)
+{
+ if (task == NULL) {
+ return;
+ }
+
+ task->purged_memory_warn = 0;
+ task->purged_memory_critical = 0;
+}
+
+
+/*
+ * Estimate task importance for purging its purgeable memory
+ * and low memory notification.
+ *
+ * Importance is calculated in the following order of criteria:
+ * -Task role : Background vs Foreground
+ * -Boost status: Not boosted vs Boosted
+ * -Darwin BG status.
+ *
+ * Returns: Estimated task importance. Less important task will have lower
+ * estimated importance.
+ */
+int
+task_importance_estimate(task_t task)
+{
+ int task_importance = 0;
+
+ if (task == NULL) {
+ return 0;
+ }
+
+ if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION)
+ task_importance += TASK_IMPORTANCE_FOREGROUND;
+
+ if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0)
+ task_importance += TASK_IMPORTANCE_NOTDARWINBG;
+
+ return task_importance;
+}
+
+boolean_t
+task_has_assertions(task_t task)
+{
+ return (task->task_imp_base->iit_assertcnt? TRUE : FALSE);
+}
+
+
+kern_return_t
+send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,
+ task_t violator,
+ struct ledger_entry_info *linfo,
+ resource_notify_flags_t flags)
+{
+#ifndef MACH_BSD
+ return KERN_NOT_SUPPORTED;
+#else
+ kern_return_t kr = KERN_SUCCESS;
+ proc_t proc = NULL;
+ posix_path_t proc_path = "";
+ proc_name_t procname = "<unknown>";
+ int pid = -1;
+ clock_sec_t secs;
+ clock_nsec_t nsecs;
+ mach_timespec_t timestamp;
+ thread_t curthread = current_thread();
+ ipc_port_t dstport = MACH_PORT_NULL;
+
+ if (!violator) {
+ kr = KERN_INVALID_ARGUMENT; goto finish;
+ }
+
+ /* extract violator information */
+ task_lock(violator);
+ if (!(proc = get_bsdtask_info(violator))) {
+ task_unlock(violator);
+ kr = KERN_INVALID_ARGUMENT; goto finish;
+ }
+ (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname));
+ pid = task_pid(violator);
+ if (flags & kRNFatalLimitFlag) {
+ kr = proc_pidpathinfo_internal(proc, 0, proc_path,
+ sizeof(proc_path), NULL);
+ }
+ task_unlock(violator);
+ if (kr) goto finish;
+
+ /* violation time ~ now */
+ clock_get_calendar_nanotime(&secs, &nsecs);
+ timestamp.tv_sec = (int32_t)secs;
+ timestamp.tv_nsec = (int32_t)nsecs;
+ /* 25567702 tracks widening mach_timespec_t */
+
+ /* send message */
+ kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE,
+ HOST_RESOURCE_NOTIFY_PORT, &dstport);
+ if (kr) goto finish;
+
+ /* TH_OPT_HONOR_QLIMIT causes ipc_kmsg_send() to respect the
+ * queue limit. It also unsets this flag, but this code also
+ * unsets it for clarity and in case that code changes. */
+ curthread->options |= TH_OPT_HONOR_QLIMIT;
+ kr = sendfunc(dstport,
+ procname, pid, proc_path, timestamp,
+ linfo->lei_balance, linfo->lei_last_refill,
+ linfo->lei_limit, linfo->lei_refill_period,
+ flags);
+ curthread->options &= (~TH_OPT_HONOR_QLIMIT);
+
+ ipc_port_release_send(dstport);
+
+finish:
+ return kr;
+#endif /* MACH_BSD */
+}
+
+
+/*
+ * Resource violations trace four 64-bit integers. For K32, two additional
+ * codes are allocated, the first with the low nibble doubled. So if the K64
+ * code is 0x042, the K32 codes would be 0x044 and 0x45.
+ */
+#ifdef __LP64__
+void
+trace_resource_violation(uint16_t code,
+ struct ledger_entry_info *linfo)
+{
+ KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code),
+ linfo->lei_balance, linfo->lei_last_refill,
+ linfo->lei_limit, linfo->lei_refill_period);
+}
+#else /* K32 */
+/* TODO: create/find a trace_two_LLs() for K32 systems */
+#define MASK32 0xffffffff
+void
+trace_resource_violation(uint16_t code,
+ struct ledger_entry_info *linfo)
+{
+ int8_t lownibble = (code & 0x3) * 2;
+ int16_t codeA = (code & 0xffc) | lownibble;
+ int16_t codeB = codeA + 1;
+
+ int32_t balance_high = (linfo->lei_balance >> 32) & MASK32;
+ int32_t balance_low = linfo->lei_balance & MASK32;
+ int32_t last_refill_high = (linfo->lei_last_refill >> 32) & MASK32;
+ int32_t last_refill_low = linfo->lei_last_refill & MASK32;
+
+ int32_t limit_high = (linfo->lei_limit >> 32) & MASK32;
+ int32_t limit_low = linfo->lei_limit & MASK32;
+ int32_t refill_period_high = (linfo->lei_refill_period >> 32) & MASK32;
+ int32_t refill_period_low = linfo->lei_refill_period & MASK32;
+
+ KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA),
+ balance_high, balance_low,
+ last_refill_high, last_refill_low);
+ KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB),
+ limit_high, limit_low,
+ refill_period_high, refill_period_low);
+}
+#endif /* K64/K32 */