+
+ if (!(*get_default)) {
+ int relprio_value = 0;
+ info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+ TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
+
+ info->tier_importance = -relprio_value;
+ } else {
+ info->qos_tier = THREAD_QOS_UNSPECIFIED;
+ info->tier_importance = 0;
+ }
+
+ break;
+ }
+
+ default:
+ result = KERN_INVALID_ARGUMENT;
+ break;
+ }
+
+ thread_mtx_unlock(thread);
+
+ return result;
+}
+
+void
+thread_policy_create(thread_t thread)
+{
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+ thread_tid(thread), theffective_0(thread),
+ theffective_1(thread), thread->base_pri, 0);
+
+ /* We pass a pend token but ignore it */
+ struct task_pend_token pend_token = {};
+
+ thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+ thread_tid(thread), theffective_0(thread),
+ theffective_1(thread), thread->base_pri, 0);
+}
+
+static void
+thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
+{
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
+ thread_tid(thread), theffective_0(thread),
+ theffective_1(thread), thread->base_pri, 0);
+
+ thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
+ thread_tid(thread), theffective_0(thread),
+ theffective_1(thread), thread->base_pri, 0);
+}
+
+
+
+/*
+ * One thread state update function TO RULE THEM ALL
+ *
+ * This function updates the thread effective policy fields
+ * and pushes the results to the relevant subsystems.
+ *
+ * Returns TRUE if a pended action needs to be run.
+ *
+ * Called with thread spinlock locked, task may be locked, thread mutex may be locked
+ */
+static void
+thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
+ task_pend_token_t pend_token)
+{
+ /*
+ * Step 1:
+ * Gather requested policy and effective task state
+ */
+
+ struct thread_requested_policy requested = thread->requested_policy;
+ struct task_effective_policy task_effective = thread->task->effective_policy;
+
+ /*
+ * Step 2:
+ * Calculate new effective policies from requested policy, task and thread state
+ * Rules:
+ * Don't change requested, it won't take effect
+ */
+
+ struct thread_effective_policy next = {};
+
+ next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
+
+ uint32_t next_qos = requested.thrp_qos;
+
+ if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
+ next_qos = MAX(requested.thrp_qos_override, next_qos);
+ next_qos = MAX(requested.thrp_qos_promote, next_qos);
+ next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
+ next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
+ next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
+ }
+
+ next.thep_qos = next_qos;
+
+ /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
+ if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
+ if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
+ next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
+ } else {
+ next.thep_qos = task_effective.tep_qos_clamp;
+ }
+ }
+
+ /*
+ * Extract outbound-promotion QoS before applying task ceiling or BG clamp
+ * This allows QoS promotions to work properly even after the process is unclamped.
+ */
+ next.thep_qos_promote = next.thep_qos;
+
+ /* The ceiling only applies to threads that are in the QoS world */
+ if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
+ next.thep_qos != THREAD_QOS_UNSPECIFIED) {
+ next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
+ }
+
+ /* Apply the sync ipc qos override */
+ assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
+
+ /*
+ * The QoS relative priority is only applicable when the original programmer's
+ * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
+ * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
+ * since otherwise it would be lower than unclamped threads. Similarly, in the
+ * presence of boosting, the programmer doesn't know what other actors
+ * are boosting the thread.
+ */
+ if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
+ (requested.thrp_qos == next.thep_qos) &&
+ (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
+ next.thep_qos_relprio = requested.thrp_qos_relprio;
+ } else {
+ next.thep_qos_relprio = 0;
+ }
+
+ /* Calculate DARWIN_BG */
+ boolean_t wants_darwinbg = FALSE;
+ boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
+
+ /*
+ * If DARWIN_BG has been requested at either level, it's engaged.
+ * darwinbg threads always create bg sockets,
+ * but only some types of darwinbg change the sockets
+ * after they're created
+ */
+ if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
+ wants_all_sockets_bg = wants_darwinbg = TRUE;
+ }
+
+ if (requested.thrp_pidbind_bg) {
+ wants_all_sockets_bg = wants_darwinbg = TRUE;
+ }
+
+ if (task_effective.tep_darwinbg) {
+ wants_darwinbg = TRUE;
+ }
+
+ if (next.thep_qos == THREAD_QOS_BACKGROUND ||
+ next.thep_qos == THREAD_QOS_MAINTENANCE) {
+ wants_darwinbg = TRUE;
+ }
+
+ /* Calculate side effects of DARWIN_BG */
+
+ if (wants_darwinbg) {
+ next.thep_darwinbg = 1;
+ }
+
+ if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
+ next.thep_new_sockets_bg = 1;
+ }
+
+ /* Don't use task_effective.tep_all_sockets_bg here */
+ if (wants_all_sockets_bg) {
+ next.thep_all_sockets_bg = 1;
+ }
+
+ /* darwinbg implies background QOS (or lower) */
+ if (next.thep_darwinbg &&
+ (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
+ next.thep_qos = THREAD_QOS_BACKGROUND;
+ next.thep_qos_relprio = 0;
+ }
+
+ /* Calculate IO policy */
+
+ int iopol = THROTTLE_LEVEL_TIER0;
+
+ /* Factor in the task's IO policy */
+ if (next.thep_darwinbg) {
+ iopol = MAX(iopol, task_effective.tep_bg_iotier);
+ }
+
+ iopol = MAX(iopol, task_effective.tep_io_tier);
+
+ /* Look up the associated IO tier value for the QoS class */
+ iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
+
+ iopol = MAX(iopol, requested.thrp_int_iotier);
+ iopol = MAX(iopol, requested.thrp_ext_iotier);
+
+ next.thep_io_tier = iopol;
+
+ /*
+ * If a QoS override is causing IO to go into a lower tier, we also set
+ * the passive bit so that a thread doesn't end up stuck in its own throttle
+ * window when the override goes away.
+ */
+ boolean_t qos_io_override_active = FALSE;
+ if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
+ thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
+ qos_io_override_active = TRUE;
+ }
+
+ /* Calculate Passive IO policy */
+ if (requested.thrp_ext_iopassive ||
+ requested.thrp_int_iopassive ||
+ qos_io_override_active ||
+ task_effective.tep_io_passive) {
+ next.thep_io_passive = 1;
+ }
+
+ /* Calculate timer QOS */
+ uint32_t latency_qos = requested.thrp_latency_qos;
+
+ latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
+ latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
+
+ next.thep_latency_qos = latency_qos;
+
+ /* Calculate throughput QOS */
+ uint32_t through_qos = requested.thrp_through_qos;
+
+ through_qos = MAX(through_qos, task_effective.tep_through_qos);
+ through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
+
+ next.thep_through_qos = through_qos;
+
+ if (task_effective.tep_terminated || requested.thrp_terminated) {
+ /* Shoot down the throttles that slow down exit or response to SIGTERM */
+ next.thep_terminated = 1;
+ next.thep_darwinbg = 0;
+ next.thep_io_tier = THROTTLE_LEVEL_TIER0;
+ next.thep_qos = THREAD_QOS_UNSPECIFIED;
+ next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
+ next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
+ }
+
+ /*
+ * Step 3:
+ * Swap out old policy for new policy
+ */
+
+ struct thread_effective_policy prev = thread->effective_policy;
+
+ thread_update_qos_cpu_time_locked(thread);
+
+ /* This is the point where the new values become visible to other threads */
+ thread->effective_policy = next;
+
+ /*
+ * Step 4:
+ * Pend updates that can't be done while holding the thread lock
+ */
+
+ if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
+ pend_token->tpt_update_sockets = 1;
+ }
+
+ /* TODO: Doesn't this only need to be done if the throttle went up? */
+ if (prev.thep_io_tier != next.thep_io_tier) {
+ pend_token->tpt_update_throttle = 1;
+ }
+
+ /*
+ * Check for the attributes that sfi_thread_classify() consults,
+ * and trigger SFI re-evaluation.
+ */
+ if (prev.thep_qos != next.thep_qos ||
+ prev.thep_darwinbg != next.thep_darwinbg) {
+ pend_token->tpt_update_thread_sfi = 1;
+ }
+
+ integer_t old_base_pri = thread->base_pri;
+
+ /*
+ * Step 5:
+ * Update other subsystems as necessary if something has changed
+ */
+
+ /* Check for the attributes that thread_recompute_priority() consults */
+ if (prev.thep_qos != next.thep_qos ||
+ prev.thep_qos_relprio != next.thep_qos_relprio ||
+ prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
+ prev.thep_terminated != next.thep_terminated ||
+ pend_token->tpt_force_recompute_pri == 1 ||
+ recompute_priority) {
+ thread_recompute_priority(thread);
+ }
+
+ /*
+ * Check if the thread is waiting on a turnstile and needs priority propagation.
+ */
+ if (pend_token->tpt_update_turnstile &&
+ ((old_base_pri == thread->base_pri) ||
+ !thread_get_waiting_turnstile(thread))) {
+ /*
+ * Reset update turnstile pend token since either
+ * the thread priority did not change or thread is
+ * not blocked on a turnstile.
+ */
+ pend_token->tpt_update_turnstile = 0;
+ }
+}
+
+
+/*
+ * Initiate a thread policy state transition on a thread with its TID
+ * Useful if you cannot guarantee the thread won't get terminated
+ * Precondition: No locks are held
+ * Will take task lock - using the non-tid variant is faster
+ * if you already have a thread ref.
+ */
+void
+proc_set_thread_policy_with_tid(task_t task,
+ uint64_t tid,
+ int category,
+ int flavor,
+ int value)
+{
+ /* takes task lock, returns ref'ed thread or NULL */
+ thread_t thread = task_findtid(task, tid);
+
+ if (thread == THREAD_NULL) {
+ return;
+ }
+
+ proc_set_thread_policy(thread, category, flavor, value);
+
+ thread_deallocate(thread);
+}
+
+/*
+ * Initiate a thread policy transition on a thread
+ * This path supports networking transitions (i.e. darwinbg transitions)
+ * Precondition: No locks are held
+ */
+void
+proc_set_thread_policy(thread_t thread,
+ int category,
+ int flavor,
+ int value)
+{
+ struct task_pend_token pend_token = {};
+
+ thread_mtx_lock(thread);
+
+ proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
+
+ thread_mtx_unlock(thread);
+
+ thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+/*
+ * Do the things that can't be done while holding a thread mutex.
+ * These are set up to call back into thread policy to get the latest value,
+ * so they don't have to be synchronized with the update.
+ * The only required semantic is 'call this sometime after updating effective policy'
+ *
+ * Precondition: Thread mutex is not held
+ *
+ * This may be called with the task lock held, but in that case it won't be
+ * called with tpt_update_sockets set.
+ */
+void
+thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
+{
+#ifdef MACH_BSD
+ if (pend_token->tpt_update_sockets) {
+ proc_apply_task_networkbg(thread->task->bsd_info, thread);
+ }
+#endif /* MACH_BSD */
+
+ if (pend_token->tpt_update_throttle) {
+ rethrottle_thread(thread->uthread);
+ }
+
+ if (pend_token->tpt_update_thread_sfi) {
+ sfi_reevaluate(thread);
+ }
+
+ if (pend_token->tpt_update_turnstile) {
+ turnstile_update_thread_priority_chain(thread);
+ }
+}
+
+/*
+ * Set and update thread policy
+ * Thread mutex might be held
+ */
+static void
+proc_set_thread_policy_locked(thread_t thread,
+ int category,
+ int flavor,
+ int value,
+ int value2,
+ task_pend_token_t pend_token)
+{
+ spl_t s = splsched();
+ thread_lock(thread);
+
+ proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
+
+ thread_unlock(thread);
+ splx(s);
+}
+
+/*
+ * Set and update thread policy
+ * Thread spinlock is held
+ */
+static void
+proc_set_thread_policy_spinlocked(thread_t thread,
+ int category,
+ int flavor,
+ int value,
+ int value2,
+ task_pend_token_t pend_token)
+{
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+ thread_tid(thread), threquested_0(thread),
+ threquested_1(thread), value, 0);
+
+ thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
+
+ thread_policy_update_spinlocked(thread, FALSE, pend_token);
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+ thread_tid(thread), threquested_0(thread),
+ threquested_1(thread), tpending(pend_token), 0);
+}
+
+/*
+ * Set the requested state for a specific flavor to a specific value.
+ */
+static void
+thread_set_requested_policy_spinlocked(thread_t thread,
+ int category,
+ int flavor,
+ int value,
+ int value2,
+ task_pend_token_t pend_token)
+{
+ int tier, passive;
+
+ struct thread_requested_policy requested = thread->requested_policy;
+
+ switch (flavor) {
+ /* Category: EXTERNAL and INTERNAL, thread and task */
+
+ case TASK_POLICY_DARWIN_BG:
+ if (category == TASK_POLICY_EXTERNAL) {
+ requested.thrp_ext_darwinbg = value;
+ } else {
+ requested.thrp_int_darwinbg = value;
+ }
+ break;
+
+ case TASK_POLICY_IOPOL:
+ proc_iopol_to_tier(value, &tier, &passive);
+ if (category == TASK_POLICY_EXTERNAL) {
+ requested.thrp_ext_iotier = tier;
+ requested.thrp_ext_iopassive = passive;
+ } else {
+ requested.thrp_int_iotier = tier;
+ requested.thrp_int_iopassive = passive;
+ }
+ break;
+
+ case TASK_POLICY_IO:
+ if (category == TASK_POLICY_EXTERNAL) {
+ requested.thrp_ext_iotier = value;
+ } else {
+ requested.thrp_int_iotier = value;
+ }
+ break;
+
+ case TASK_POLICY_PASSIVE_IO:
+ if (category == TASK_POLICY_EXTERNAL) {
+ requested.thrp_ext_iopassive = value;
+ } else {
+ requested.thrp_int_iopassive = value;
+ }
+ break;
+
+ /* Category: ATTRIBUTE, thread only */
+
+ case TASK_POLICY_PIDBIND_BG:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_pidbind_bg = value;
+ break;
+
+ case TASK_POLICY_LATENCY_QOS:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_latency_qos = value;
+ break;
+
+ case TASK_POLICY_THROUGH_QOS:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_through_qos = value;
+ break;
+
+ case TASK_POLICY_QOS_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos_override = value;
+ pend_token->tpt_update_turnstile = 1;
+ break;
+
+ case TASK_POLICY_QOS_AND_RELPRIO:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos = value;
+ requested.thrp_qos_relprio = value2;
+ pend_token->tpt_update_turnstile = 1;
+ DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
+ break;
+
+ case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos_workq_override = value;
+ pend_token->tpt_update_turnstile = 1;
+ break;
+
+ case TASK_POLICY_QOS_PROMOTE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos_promote = value;
+ break;
+
+ case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos_kevent_override = value;
+ pend_token->tpt_update_turnstile = 1;
+ break;
+
+ case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_qos_wlsvc_override = value;
+ pend_token->tpt_update_turnstile = 1;
+ break;
+
+ case TASK_POLICY_TERMINATED:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ requested.thrp_terminated = value;
+ break;
+
+ default:
+ panic("unknown task policy: %d %d %d", category, flavor, value);
+ break;
+ }
+
+ thread->requested_policy = requested;
+}
+
+/*
+ * Gets what you set. Effective values may be different.
+ * Precondition: No locks are held
+ */
+int
+proc_get_thread_policy(thread_t thread,
+ int category,
+ int flavor)
+{
+ int value = 0;
+ thread_mtx_lock(thread);
+ value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
+ thread_mtx_unlock(thread);
+ return value;
+}
+
+static int
+proc_get_thread_policy_locked(thread_t thread,
+ int category,
+ int flavor,
+ int* value2)
+{
+ int value = 0;
+
+ spl_t s = splsched();
+ thread_lock(thread);
+
+ value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
+
+ thread_unlock(thread);
+ splx(s);
+
+ return value;
+}
+
+/*
+ * Gets what you set. Effective values may be different.
+ */
+static int
+thread_get_requested_policy_spinlocked(thread_t thread,
+ int category,
+ int flavor,
+ int* value2)
+{
+ int value = 0;
+
+ struct thread_requested_policy requested = thread->requested_policy;
+
+ switch (flavor) {
+ case TASK_POLICY_DARWIN_BG:
+ if (category == TASK_POLICY_EXTERNAL) {
+ value = requested.thrp_ext_darwinbg;
+ } else {
+ value = requested.thrp_int_darwinbg;
+ }
+ break;
+ case TASK_POLICY_IOPOL:
+ if (category == TASK_POLICY_EXTERNAL) {
+ value = proc_tier_to_iopol(requested.thrp_ext_iotier,
+ requested.thrp_ext_iopassive);
+ } else {
+ value = proc_tier_to_iopol(requested.thrp_int_iotier,
+ requested.thrp_int_iopassive);
+ }
+ break;
+ case TASK_POLICY_IO:
+ if (category == TASK_POLICY_EXTERNAL) {
+ value = requested.thrp_ext_iotier;
+ } else {
+ value = requested.thrp_int_iotier;
+ }
+ break;
+ case TASK_POLICY_PASSIVE_IO:
+ if (category == TASK_POLICY_EXTERNAL) {
+ value = requested.thrp_ext_iopassive;
+ } else {
+ value = requested.thrp_int_iopassive;
+ }
+ break;
+ case TASK_POLICY_QOS:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos;
+ break;
+ case TASK_POLICY_QOS_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos_override;
+ break;
+ case TASK_POLICY_LATENCY_QOS:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_latency_qos;
+ break;
+ case TASK_POLICY_THROUGH_QOS:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_through_qos;
+ break;
+ case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos_workq_override;
+ break;
+ case TASK_POLICY_QOS_AND_RELPRIO:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ assert(value2 != NULL);
+ value = requested.thrp_qos;
+ *value2 = requested.thrp_qos_relprio;
+ break;
+ case TASK_POLICY_QOS_PROMOTE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos_promote;
+ break;
+ case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos_kevent_override;
+ break;
+ case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_qos_wlsvc_override;
+ break;
+ case TASK_POLICY_TERMINATED:
+ assert(category == TASK_POLICY_ATTRIBUTE);
+ value = requested.thrp_terminated;
+ break;
+
+ default:
+ panic("unknown policy_flavor %d", flavor);
+ break;
+ }
+
+ return value;
+}
+
+/*
+ * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
+ *
+ * NOTE: This accessor does not take the task or thread lock.
+ * Notifications of state updates need to be externally synchronized with state queries.
+ * This routine *MUST* remain interrupt safe, as it is potentially invoked
+ * within the context of a timer interrupt.
+ *
+ * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
+ * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
+ * I don't think that cost is worth not having the right answer.
+ */
+int
+proc_get_effective_thread_policy(thread_t thread,
+ int flavor)
+{
+ int value = 0;
+
+ switch (flavor) {
+ case TASK_POLICY_DARWIN_BG:
+ /*
+ * This call is used within the timer layer, as well as
+ * prioritizing requests to the graphics system.
+ * It also informs SFI and originator-bg-state.
+ * Returns 1 for background mode, 0 for normal mode
+ */
+
+ value = thread->effective_policy.thep_darwinbg ? 1 : 0;
+ break;
+ case TASK_POLICY_IO:
+ /*
+ * The I/O system calls here to find out what throttling tier to apply to an operation.
+ * Returns THROTTLE_LEVEL_* values
+ */
+ value = thread->effective_policy.thep_io_tier;
+ if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
+ value = MIN(value, thread->iotier_override);
+ }
+ break;
+ case TASK_POLICY_PASSIVE_IO:
+ /*
+ * The I/O system calls here to find out whether an operation should be passive.
+ * (i.e. not cause operations with lower throttle tiers to be throttled)
+ * Returns 1 for passive mode, 0 for normal mode
+ *
+ * If an override is causing IO to go into a lower tier, we also set
+ * the passive bit so that a thread doesn't end up stuck in its own throttle
+ * window when the override goes away.
+ */
+ value = thread->effective_policy.thep_io_passive ? 1 : 0;
+ if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
+ thread->iotier_override < thread->effective_policy.thep_io_tier) {
+ value = 1;
+ }
+ break;
+ case TASK_POLICY_ALL_SOCKETS_BG:
+ /*
+ * do_background_socket() calls this to determine whether
+ * it should change the thread's sockets
+ * Returns 1 for background mode, 0 for normal mode
+ * This consults both thread and task so un-DBGing a thread while the task is BG
+ * doesn't get you out of the network throttle.
+ */
+ value = (thread->effective_policy.thep_all_sockets_bg ||
+ thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
+ break;
+ case TASK_POLICY_NEW_SOCKETS_BG:
+ /*
+ * socreate() calls this to determine if it should mark a new socket as background
+ * Returns 1 for background mode, 0 for normal mode
+ */
+ value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
+ break;
+ case TASK_POLICY_LATENCY_QOS:
+ /*
+ * timer arming calls into here to find out the timer coalescing level
+ * Returns a latency QoS tier (0-6)
+ */
+ value = thread->effective_policy.thep_latency_qos;
+ break;
+ case TASK_POLICY_THROUGH_QOS:
+ /*
+ * This value is passed into the urgency callout from the scheduler
+ * to the performance management subsystem.
+ *
+ * Returns a throughput QoS tier (0-6)
+ */
+ value = thread->effective_policy.thep_through_qos;
+ break;
+ case TASK_POLICY_QOS:
+ /*
+ * This is communicated to the performance management layer and SFI.
+ *
+ * Returns a QoS policy tier
+ */
+ value = thread->effective_policy.thep_qos;
+ break;
+ default:
+ panic("unknown thread policy flavor %d", flavor);
+ break;
+ }
+
+ return value;
+}
+
+
+/*
+ * (integer_t) casts limit the number of bits we can fit here
+ * this interface is deprecated and replaced by the _EXT struct ?
+ */
+static void
+proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
+{
+ uint64_t bits = 0;
+ struct thread_requested_policy requested = thread->requested_policy;
+
+ bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
+ bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
+ bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
+ bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
+ bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
+ bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
+
+ bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
+ bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
+
+ bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
+
+ bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
+ bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
+
+ info->requested = (integer_t) bits;
+ bits = 0;
+
+ struct thread_effective_policy effective = thread->effective_policy;
+
+ bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
+
+ bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
+ bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
+ bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
+ bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
+
+ bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
+
+ bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
+ bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
+
+ info->effective = (integer_t)bits;
+ bits = 0;
+
+ info->pending = 0;
+}
+
+/*
+ * Sneakily trace either the task and thread requested
+ * or just the thread requested, depending on if we have enough room.
+ * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
+ *
+ * LP32 LP64
+ * threquested_0(thread) thread[0] task[0]
+ * threquested_1(thread) thread[1] thread[0]
+ *
+ */
+
+uintptr_t
+threquested_0(thread_t thread)
+{
+ static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
+
+ uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
+
+ return raw[0];
+}
+
+uintptr_t
+threquested_1(thread_t thread)
+{
+#if defined __LP64__
+ return *(uintptr_t*)&thread->task->requested_policy;
+#else
+ uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
+ return raw[1];
+#endif
+}
+
+uintptr_t
+theffective_0(thread_t thread)
+{
+ static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
+
+ uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
+ return raw[0];
+}
+
+uintptr_t
+theffective_1(thread_t thread)
+{
+#if defined __LP64__
+ return *(uintptr_t*)&thread->task->effective_policy;
+#else
+ uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
+ return raw[1];
+#endif
+}
+
+
+/*
+ * Set an override on the thread which is consulted with a
+ * higher priority than the task/thread policy. This should
+ * only be set for temporary grants until the thread
+ * returns to the userspace boundary
+ *
+ * We use atomic operations to swap in the override, with
+ * the assumption that the thread itself can
+ * read the override and clear it on return to userspace.
+ *
+ * No locking is performed, since it is acceptable to see
+ * a stale override for one loop through throttle_lowpri_io().
+ * However a thread reference must be held on the thread.
+ */
+
+void
+set_thread_iotier_override(thread_t thread, int policy)
+{
+ int current_override;
+
+ /* Let most aggressive I/O policy win until user boundary */
+ do {
+ current_override = thread->iotier_override;
+
+ if (current_override != THROTTLE_LEVEL_NONE) {
+ policy = MIN(current_override, policy);
+ }
+
+ if (current_override == policy) {
+ /* no effective change */
+ return;
+ }
+ } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
+
+ /*
+ * Since the thread may be currently throttled,
+ * re-evaluate tiers and potentially break out
+ * of an msleep
+ */
+ rethrottle_thread(thread->uthread);
+}
+
+/*
+ * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
+ * semaphores, dispatch_sync) may result in priority inversions where a higher priority
+ * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
+ * priority thread. In these cases, we attempt to propagate the priority token, as long
+ * as the subsystem informs us of the relationships between the threads. The userspace
+ * synchronization subsystem should maintain the information of owner->resource and
+ * resource->waiters itself.
+ */
+
+/*
+ * This helper canonicalizes the resource/resource_type given the current qos_override_mode
+ * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
+ * to be handled specially in the future, but for now it's fine to slam
+ * *resource to USER_ADDR_NULL even if it was previously a wildcard.
+ */
+static void
+canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
+{
+ if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
+ /* Map all input resource/type to a single one */
+ *resource = USER_ADDR_NULL;
+ *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
+ } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
+ /* no transform */
+ } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
+ /* Map all mutex overrides to a single one, to avoid memory overhead */
+ if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
+ *resource = USER_ADDR_NULL;
+ }
+ }
+}
+
+/* This helper routine finds an existing override if known. Locking should be done by caller */
+static struct thread_qos_override *
+find_qos_override(thread_t thread,
+ user_addr_t resource,
+ int resource_type)
+{
+ struct thread_qos_override *override;
+
+ override = thread->overrides;
+ while (override) {
+ if (override->override_resource == resource &&
+ override->override_resource_type == resource_type) {
+ return override;
+ }
+
+ override = override->override_next;
+ }
+
+ return NULL;
+}
+
+static void
+find_and_decrement_qos_override(thread_t thread,
+ user_addr_t resource,
+ int resource_type,
+ boolean_t reset,
+ struct thread_qos_override **free_override_list)
+{
+ struct thread_qos_override *override, *override_prev;
+
+ override_prev = NULL;
+ override = thread->overrides;
+ while (override) {
+ struct thread_qos_override *override_next = override->override_next;
+
+ if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
+ (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
+ if (reset) {
+ override->override_contended_resource_count = 0;
+ } else {
+ override->override_contended_resource_count--;
+ }
+
+ if (override->override_contended_resource_count == 0) {
+ if (override_prev == NULL) {
+ thread->overrides = override_next;
+ } else {
+ override_prev->override_next = override_next;
+ }
+
+ /* Add to out-param for later zfree */
+ override->override_next = *free_override_list;
+ *free_override_list = override;
+ } else {
+ override_prev = override;
+ }
+
+ if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
+ return;
+ }
+ } else {
+ override_prev = override;
+ }
+
+ override = override_next;
+ }
+}
+
+/* This helper recalculates the current requested override using the policy selected at boot */
+static int
+calculate_requested_qos_override(thread_t thread)
+{
+ if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
+ return THREAD_QOS_UNSPECIFIED;
+ }
+
+ /* iterate over all overrides and calculate MAX */
+ struct thread_qos_override *override;
+ int qos_override = THREAD_QOS_UNSPECIFIED;
+
+ override = thread->overrides;
+ while (override) {
+ qos_override = MAX(qos_override, override->override_qos);
+ override = override->override_next;
+ }
+
+ return qos_override;
+}
+
+/*
+ * Returns:
+ * - 0 on success
+ * - EINVAL if some invalid input was passed
+ */
+static int
+proc_thread_qos_add_override_internal(thread_t thread,
+ int override_qos,
+ boolean_t first_override_for_resource,
+ user_addr_t resource,
+ int resource_type)
+{
+ struct task_pend_token pend_token = {};
+ int rc = 0;
+
+ thread_mtx_lock(thread);
+
+ KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
+ thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
+
+ DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
+ uint64_t, thread->requested_policy.thrp_qos,
+ uint64_t, thread->effective_policy.thep_qos,
+ int, override_qos, boolean_t, first_override_for_resource);
+
+ struct thread_qos_override *override;
+ struct thread_qos_override *override_new = NULL;
+ int new_qos_override, prev_qos_override;
+ int new_effective_qos;
+
+ canonicalize_resource_and_type(&resource, &resource_type);
+
+ override = find_qos_override(thread, resource, resource_type);
+ if (first_override_for_resource && !override) {
+ /* We need to allocate a new object. Drop the thread lock and
+ * recheck afterwards in case someone else added the override
+ */
+ thread_mtx_unlock(thread);
+ override_new = zalloc(thread_qos_override_zone);
+ thread_mtx_lock(thread);
+ override = find_qos_override(thread, resource, resource_type);
+ }
+ if (first_override_for_resource && override) {
+ /* Someone else already allocated while the thread lock was dropped */
+ override->override_contended_resource_count++;
+ } else if (!override && override_new) {
+ override = override_new;
+ override_new = NULL;
+ override->override_next = thread->overrides;
+ /* since first_override_for_resource was TRUE */
+ override->override_contended_resource_count = 1;
+ override->override_resource = resource;
+ override->override_resource_type = resource_type;
+ override->override_qos = THREAD_QOS_UNSPECIFIED;
+ thread->overrides = override;
+ }
+
+ if (override) {
+ if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
+ override->override_qos = override_qos;
+ } else {
+ override->override_qos = MAX(override->override_qos, override_qos);
+ }
+ }
+
+ /* Determine how to combine the various overrides into a single current
+ * requested override
+ */
+ new_qos_override = calculate_requested_qos_override(thread);
+
+ prev_qos_override = proc_get_thread_policy_locked(thread,
+ TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
+
+ if (new_qos_override != prev_qos_override) {
+ proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+ TASK_POLICY_QOS_OVERRIDE,
+ new_qos_override, 0, &pend_token);
+ }
+
+ new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
+
+ thread_mtx_unlock(thread);
+
+ thread_policy_update_complete_unlocked(thread, &pend_token);
+
+ if (override_new) {
+ zfree(thread_qos_override_zone, override_new);
+ }
+
+ DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
+ int, new_qos_override, int, new_effective_qos, int, rc);
+
+ KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
+ new_qos_override, resource, resource_type, 0, 0);
+
+ return rc;
+}
+
+int
+proc_thread_qos_add_override(task_t task,
+ thread_t thread,
+ uint64_t tid,
+ int override_qos,
+ boolean_t first_override_for_resource,
+ user_addr_t resource,
+ int resource_type)
+{
+ boolean_t has_thread_reference = FALSE;
+ int rc = 0;
+
+ if (thread == THREAD_NULL) {
+ thread = task_findtid(task, tid);
+ /* returns referenced thread */
+
+ if (thread == THREAD_NULL) {
+ KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
+ tid, 0, 0xdead, 0, 0);
+ return ESRCH;