]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/thread_policy.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / osfmk / kern / thread_policy.c
index f30639e47e5244ab53d027e4a85fa05f9fd4edcd..3ba515bef0387a9a8980d659e2f92e2ce26c464b 100644 (file)
@@ -53,13 +53,12 @@ uint32_t qos_override_mode;
 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
-#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH 3
-#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 4
+#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
 
 extern zone_t thread_qos_override_zone;
 
-static boolean_t
-proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, boolean_t squash);
+static void
+proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
 
 /*
  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
@@ -129,7 +128,7 @@ static void
 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 
 static void
-thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
+thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 
 static int
 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
@@ -144,7 +143,8 @@ static void
 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 
 void
-thread_policy_init(void) {
+thread_policy_init(void)
+{
        if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
                printf("QOS override mode: 0x%08x\n", qos_override_mode);
        } else {
@@ -153,22 +153,22 @@ thread_policy_init(void) {
 }
 
 boolean_t
-thread_has_qos_policy(thread_t thread) {
+thread_has_qos_policy(thread_t thread)
+{
        return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 }
 
 
 static void
 thread_remove_qos_policy_locked(thread_t thread,
-                                task_pend_token_t pend_token)
+    task_pend_token_t pend_token)
 {
-
        __unused int prev_qos = thread->requested_policy.thrp_qos;
 
        DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 
        proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
-                                     THREAD_QOS_UNSPECIFIED, 0, pend_token);
+           THREAD_QOS_UNSPECIFIED, 0, pend_token);
 }
 
 kern_return_t
@@ -213,28 +213,30 @@ thread_qos_scaled_relative_priority(int qos, int qos_relprio)
        int next_lower_qos;
 
        /* Fast path, since no validation or scaling is needed */
-       if (qos_relprio == 0) return 0;
+       if (qos_relprio == 0) {
+               return 0;
+       }
 
        switch (qos) {
-               case THREAD_QOS_USER_INTERACTIVE:
-                       next_lower_qos = THREAD_QOS_USER_INITIATED;
-                       break;
-               case THREAD_QOS_USER_INITIATED:
-                       next_lower_qos = THREAD_QOS_LEGACY;
-                       break;
-               case THREAD_QOS_LEGACY:
-                       next_lower_qos = THREAD_QOS_UTILITY;
-                       break;
-               case THREAD_QOS_UTILITY:
-                       next_lower_qos = THREAD_QOS_BACKGROUND;
-                       break;
-               case THREAD_QOS_MAINTENANCE:
-               case THREAD_QOS_BACKGROUND:
-                       next_lower_qos = 0;
-                       break;
-               default:
-                       panic("Unrecognized QoS %d", qos);
-                       return 0;
+       case THREAD_QOS_USER_INTERACTIVE:
+               next_lower_qos = THREAD_QOS_USER_INITIATED;
+               break;
+       case THREAD_QOS_USER_INITIATED:
+               next_lower_qos = THREAD_QOS_LEGACY;
+               break;
+       case THREAD_QOS_LEGACY:
+               next_lower_qos = THREAD_QOS_UTILITY;
+               break;
+       case THREAD_QOS_UTILITY:
+               next_lower_qos = THREAD_QOS_BACKGROUND;
+               break;
+       case THREAD_QOS_MAINTENANCE:
+       case THREAD_QOS_BACKGROUND:
+               next_lower_qos = 0;
+               break;
+       default:
+               panic("Unrecognized QoS %d", qos);
+               return 0;
        }
 
        int prio_range_max = thread_qos_policy_params.qos_pri[qos];
@@ -260,25 +262,28 @@ boolean_t allow_qos_policy_set = FALSE;
 
 kern_return_t
 thread_policy_set(
-       thread_t                                thread,
-       thread_policy_flavor_t  flavor,
-       thread_policy_t                 policy_info,
-       mach_msg_type_number_t  count)
+       thread_t                                thread,
+       thread_policy_flavor_t  flavor,
+       thread_policy_t                 policy_info,
+       mach_msg_type_number_t  count)
 {
        thread_qos_policy_data_t req_qos;
        kern_return_t kr;
-       
+
        req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 
-       if (thread == THREAD_NULL)
-               return (KERN_INVALID_ARGUMENT);
+       if (thread == THREAD_NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
 
        if (allow_qos_policy_set == FALSE) {
-               if (thread_is_static_param(thread))
-                       return (KERN_POLICY_STATIC);
+               if (thread_is_static_param(thread)) {
+                       return KERN_POLICY_STATIC;
+               }
 
-               if (flavor == THREAD_QOS_POLICY)
-                       return (KERN_INVALID_ARGUMENT);
+               if (flavor == THREAD_QOS_POLICY) {
+                       return KERN_INVALID_ARGUMENT;
+               }
        }
 
        /* Threads without static_param set reset their QoS when other policies are applied. */
@@ -308,10 +313,10 @@ thread_policy_set(
 
 kern_return_t
 thread_policy_set_internal(
-                           thread_t                     thread,
-                           thread_policy_flavor_t       flavor,
-                           thread_policy_t              policy_info,
-                           mach_msg_type_number_t       count)
+       thread_t                     thread,
+       thread_policy_flavor_t       flavor,
+       thread_policy_t              policy_info,
+       mach_msg_type_number_t       count)
 {
        kern_return_t result = KERN_SUCCESS;
        struct task_pend_token pend_token = {};
@@ -320,11 +325,10 @@ thread_policy_set_internal(
        if (!thread->active) {
                thread_mtx_unlock(thread);
 
-               return (KERN_TERMINATED);
+               return KERN_TERMINATED;
        }
 
        switch (flavor) {
-
        case THREAD_EXTENDED_POLICY:
        {
                boolean_t timeshare = TRUE;
@@ -361,9 +365,9 @@ thread_policy_set_internal(
                }
 
                info = (thread_time_constraint_policy_t)policy_info;
-               if (info->constraint  < info->computation   ||
-                   info->computation > max_rt_quantum      ||
-                   info->computation < min_rt_quantum      ) {
+               if (info->constraint < info->computation ||
+                   info->computation > max_rt_quantum ||
+                   info->computation < min_rt_quantum) {
                        result = KERN_INVALID_ARGUMENT;
                        break;
                }
@@ -433,6 +437,38 @@ thread_policy_set_internal(
                return thread_affinity_set(thread, info->affinity_tag);
        }
 
+#if CONFIG_EMBEDDED
+       case THREAD_BACKGROUND_POLICY:
+       {
+               thread_background_policy_t info;
+
+               if (count < THREAD_BACKGROUND_POLICY_COUNT) {
+                       result = KERN_INVALID_ARGUMENT;
+                       break;
+               }
+
+               if (thread->task != current_task()) {
+                       result = KERN_PROTECTION_FAILURE;
+                       break;
+               }
+
+               info = (thread_background_policy_t) policy_info;
+
+               int enable;
+
+               if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
+                       enable = TASK_POLICY_ENABLE;
+               } else {
+                       enable = TASK_POLICY_DISABLE;
+               }
+
+               int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
+
+               proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
+
+               break;
+       }
+#endif /* CONFIG_EMBEDDED */
 
        case THREAD_THROUGHPUT_QOS_POLICY:
        {
@@ -444,13 +480,14 @@ thread_policy_set_internal(
                        break;
                }
 
-               if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
+               if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
                        break;
+               }
 
                tqos = qos_extract(info->thread_throughput_qos_tier);
 
                proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
-                                             TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
+                   TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 
                break;
        }
@@ -465,13 +502,14 @@ thread_policy_set_internal(
                        break;
                }
 
-               if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
+               if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
                        break;
+               }
 
                lqos = qos_extract(info->thread_latency_qos_tier);
 
                proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
-                                             TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
+                   TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 
                break;
        }
@@ -501,7 +539,7 @@ thread_policy_set_internal(
                }
 
                proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
-                                             info->qos_tier, -info->tier_importance, &pend_token);
+                   info->qos_tier, -info->tier_importance, &pend_token);
 
                break;
        }
@@ -515,7 +553,7 @@ thread_policy_set_internal(
 
        thread_policy_update_complete_unlocked(thread, &pend_token);
 
-       return (result);
+       return result;
 }
 
 /*
@@ -523,16 +561,17 @@ thread_policy_set_internal(
  * Both result in FIXED mode scheduling.
  */
 static sched_mode_t
-convert_policy_to_sched_mode(integer_t policy) {
+convert_policy_to_sched_mode(integer_t policy)
+{
        switch (policy) {
-               case POLICY_TIMESHARE:
-                       return TH_MODE_TIMESHARE;
-               case POLICY_RR:
-               case POLICY_FIFO:
-                       return TH_MODE_FIXED;
-               default:
-                       panic("unexpected sched policy: %d", policy);
-                       return TH_MODE_NONE;
+       case POLICY_TIMESHARE:
+               return TH_MODE_TIMESHARE;
+       case POLICY_RR:
+       case POLICY_FIFO:
+               return TH_MODE_FIXED;
+       default:
+               panic("unexpected sched policy: %d", policy);
+               return TH_MODE_NONE;
        }
 }
 
@@ -542,9 +581,9 @@ convert_policy_to_sched_mode(integer_t policy) {
  */
 static kern_return_t
 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
-                                          sched_mode_t          mode,
-                                          integer_t             priority,
-                                          task_pend_token_t     pend_token)
+    sched_mode_t          mode,
+    integer_t             priority,
+    task_pend_token_t     pend_token)
 {
        kern_return_t kr = KERN_SUCCESS;
 
@@ -572,28 +611,31 @@ thread_set_mode_and_absolute_pri_internal(thread_t              thread,
         * TODO: Store the absolute priority value instead
         */
 
-       if (priority >= thread->max_priority)
+       if (priority >= thread->max_priority) {
                priority = thread->max_priority - thread->task_priority;
-       else if (priority >= MINPRI_KERNEL)
+       } else if (priority >= MINPRI_KERNEL) {
                priority -=  MINPRI_KERNEL;
-       else if (priority >= MINPRI_RESERVED)
+       } else if (priority >= MINPRI_RESERVED) {
                priority -=  MINPRI_RESERVED;
-       else
+       } else {
                priority -= BASEPRI_DEFAULT;
+       }
 
        priority += thread->task_priority;
 
-       if (priority > thread->max_priority)
+       if (priority > thread->max_priority) {
                priority = thread->max_priority;
-       else if (priority < MINPRI)
+       } else if (priority < MINPRI) {
                priority = MINPRI;
+       }
 
        thread->importance = priority - thread->task_priority;
 
        thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 
-       if (mode != old_mode)
+       if (mode != old_mode) {
                pend_token->tpt_update_thread_sfi = 1;
+       }
 
 unlock:
        thread_unlock(thread);
@@ -602,35 +644,179 @@ unlock:
        return kr;
 }
 
+void
+thread_freeze_base_pri(thread_t thread)
+{
+       assert(thread == current_thread());
+
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
+       thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
+
+       thread_unlock(thread);
+       splx(s);
+}
+
+bool
+thread_unfreeze_base_pri(thread_t thread)
+{
+       assert(thread == current_thread());
+       integer_t base_pri;
+       ast_t ast = 0;
+
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
+       thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
+
+       base_pri = thread->req_base_pri;
+       if (base_pri != thread->base_pri) {
+               /*
+                * This function returns "true" if the base pri change
+                * is the most likely cause for the preemption.
+                */
+               sched_set_thread_base_priority(thread, base_pri);
+               ast = ast_peek(AST_PREEMPT);
+       }
+
+       thread_unlock(thread);
+       splx(s);
+
+       return ast != 0;
+}
+
+uint8_t
+thread_workq_pri_for_qos(thread_qos_t qos)
+{
+       assert(qos < THREAD_QOS_LAST);
+       return (uint8_t)thread_qos_policy_params.qos_pri[qos];
+}
+
+thread_qos_t
+thread_workq_qos_for_pri(int priority)
+{
+       int qos;
+       if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
+               // indicate that workq should map >UI threads to workq's
+               // internal notation for above-UI work.
+               return THREAD_QOS_UNSPECIFIED;
+       }
+       for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
+               // map a given priority up to the next nearest qos band.
+               if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
+                       return qos;
+               }
+       }
+       return THREAD_QOS_MAINTENANCE;
+}
+
 /*
- * KPI for pthread kext
+ * private interface for pthread workqueues
  *
  * Set scheduling policy & absolute priority for thread
- * May be called from waitqueue callout context with spinlocks held
+ * May be called with spinlocks held
  * Thread mutex lock is not held
  */
-kern_return_t
+void
+thread_reset_workq_qos(thread_t thread, uint32_t qos)
+{
+       struct task_pend_token pend_token = {};
+
+       assert(qos < THREAD_QOS_LAST);
+
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
+           &pend_token);
+
+       assert(pend_token.tpt_update_sockets == 0);
+
+       thread_unlock(thread);
+       splx(s);
+
+       thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+/*
+ * private interface for pthread workqueues
+ *
+ * Set scheduling policy & absolute priority for thread
+ * May be called with spinlocks held
+ * Thread mutex lock is held
+ */
+void
+thread_set_workq_override(thread_t thread, uint32_t qos)
+{
+       struct task_pend_token pend_token = {};
+
+       assert(qos < THREAD_QOS_LAST);
+
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
+
+       assert(pend_token.tpt_update_sockets == 0);
+
+       thread_unlock(thread);
+       splx(s);
+
+       thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+/*
+ * private interface for pthread workqueues
+ *
+ * Set scheduling policy & absolute priority for thread
+ * May be called with spinlocks held
+ * Thread mutex lock is not held
+ */
+void
 thread_set_workq_pri(thread_t  thread,
-                     integer_t priority,
-                     integer_t policy)
+    thread_qos_t qos,
+    integer_t priority,
+    integer_t policy)
 {
        struct task_pend_token pend_token = {};
        sched_mode_t mode = convert_policy_to_sched_mode(policy);
 
+       assert(qos < THREAD_QOS_LAST);
        assert(thread->static_param);
-       if (!thread->static_param)
-               return KERN_FAILURE;
+
+       if (!thread->static_param || !thread->active) {
+               return;
+       }
+
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
+           0, &pend_token);
+
+       thread_unlock(thread);
+       splx(s);
 
        /* Concern: this doesn't hold the mutex... */
-       if (!thread->active)
-               return KERN_TERMINATED;
 
-       kern_return_t kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
+       __assert_only kern_return_t kr;
+       kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
+           &pend_token);
+       assert(kr == KERN_SUCCESS);
 
-       if (pend_token.tpt_update_thread_sfi)
+       if (pend_token.tpt_update_thread_sfi) {
                sfi_reevaluate(thread);
-
-       return kr;
+       }
 }
 
 /*
@@ -643,8 +829,8 @@ thread_set_workq_pri(thread_t  thread,
  */
 kern_return_t
 thread_set_mode_and_absolute_pri(thread_t   thread,
-                                 integer_t  policy,
-                                 integer_t  priority)
+    integer_t  policy,
+    integer_t  priority)
 {
        kern_return_t kr = KERN_SUCCESS;
        struct task_pend_token pend_token = {};
@@ -664,8 +850,9 @@ thread_set_mode_and_absolute_pri(thread_t   thread,
        }
 
        /* Setting legacy policies on threads kills the current QoS */
-       if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
+       if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
                thread_remove_qos_policy_locked(thread, &pend_token);
+       }
 
        kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 
@@ -674,7 +861,7 @@ unlock:
 
        thread_policy_update_complete_unlocked(thread, &pend_token);
 
-       return (kr);
+       return kr;
 }
 
 /*
@@ -688,8 +875,9 @@ unlock:
 static void
 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 {
-       if (thread->policy_reset)
+       if (thread->policy_reset) {
                return;
+       }
 
        boolean_t removed = thread_run_queue_remove(thread);
 
@@ -698,15 +886,17 @@ thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
         * That way there's zero confusion over which the user wants
         * and which the kernel wants.
         */
-       if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
+       if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
                thread->saved_mode = mode;
-       else
+       } else {
                sched_set_thread_mode(thread, mode);
+       }
 
        thread_recompute_priority(thread);
 
-       if (removed)
+       if (removed) {
                thread_run_queue_reinsert(thread, SCHED_TAILQ);
+       }
 }
 
 /* called at splsched with thread lock locked */
@@ -721,7 +911,7 @@ thread_update_qos_cpu_time_locked(thread_t thread)
         * last context switch (embedded) or last user/kernel boundary transition (desktop)
         * because user_timer and system_timer are only updated then.
         *
-        * TODO: Consider running a thread_timer_event operation here to update it first.
+        * TODO: Consider running a timer_update operation here to update it first.
         *       Maybe doable with interrupts disabled from current thread.
         *       If the thread is on a different core, may not be easy to get right.
         *
@@ -736,17 +926,32 @@ thread_update_qos_cpu_time_locked(thread_t thread)
 
        uint64_t* task_counter = NULL;
 
-       /* Update the task-level qos stats atomically, because we don't have the task lock. */
+       /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
        switch (thread->effective_policy.thep_qos) {
-               case THREAD_QOS_DEFAULT:            task_counter = &task->cpu_time_qos_stats.cpu_time_qos_default;          break;
-               case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_qos_stats.cpu_time_qos_maintenance;      break;
-               case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_qos_stats.cpu_time_qos_background;       break;
-               case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_qos_stats.cpu_time_qos_utility;          break;
-               case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_qos_stats.cpu_time_qos_legacy;           break;
-               case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_initiated;   break;
-               case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_interactive; break;
-               default:
-                       panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
+       case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
+       case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
+       case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
+       case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
+       case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
+       case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
+       case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
+       default:
+               panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
+       }
+
+       OSAddAtomic64(timer_delta, task_counter);
+
+       /* Update the task-level qos stats atomically, because we don't have the task lock. */
+       switch (thread->requested_policy.thrp_qos) {
+       case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
+       case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
+       case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
+       case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
+       case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
+       case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
+       case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
+       default:
+               panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
        }
 
        OSAddAtomic64(timer_delta, task_counter);
@@ -777,14 +982,18 @@ thread_update_qos_cpu_time(thread_t thread)
  *
  * Called with thread_lock and thread mutex held.
  */
+extern thread_t vm_pageout_scan_thread;
+extern boolean_t vps_dynamic_priority_enabled;
+
 void
 thread_recompute_priority(
-       thread_t                thread)
+       thread_t                thread)
 {
-       integer_t               priority;
+       integer_t               priority;
 
-       if (thread->policy_reset)
+       if (thread->policy_reset) {
                return;
+       }
 
        if (thread->sched_mode == TH_MODE_REALTIME) {
                sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
@@ -810,12 +1019,13 @@ thread_recompute_priority(
 
                priority += qos_scaled_relprio;
        } else {
-               if (thread->importance > MAXPRI)
+               if (thread->importance > MAXPRI) {
                        priority = MAXPRI;
-               else if (thread->importance < -MAXPRI)
+               } else if (thread->importance < -MAXPRI) {
                        priority = -MAXPRI;
-               else
+               } else {
                        priority = thread->importance;
+               }
 
                priority += thread->task_priority;
        }
@@ -831,14 +1041,16 @@ thread_recompute_priority(
         * Note that thread->importance is user-settable to any integer
         * via THREAD_PRECEDENCE_POLICY.
         */
-       if (priority > thread->max_priority)
+       if (priority > thread->max_priority) {
                priority = thread->max_priority;
-       else if (priority < MINPRI)
+       } else if (priority < MINPRI) {
                priority = MINPRI;
+       }
 
        if (thread->saved_mode == TH_MODE_REALTIME &&
-           thread->sched_flags & TH_SFLAG_FAILSAFE)
+           thread->sched_flags & TH_SFLAG_FAILSAFE) {
                priority = DEPRESSPRI;
+       }
 
        if (thread->effective_policy.thep_terminated == TRUE) {
                /*
@@ -848,12 +1060,20 @@ thread_recompute_priority(
                 * so that the thread is no longer clamped to background
                 * during the final exit phase.
                 */
-               if (priority < thread->task_priority)
+               if (priority < thread->task_priority) {
                        priority = thread->task_priority;
-               if (priority < BASEPRI_DEFAULT)
+               }
+               if (priority < BASEPRI_DEFAULT) {
                        priority = BASEPRI_DEFAULT;
+               }
        }
 
+#if CONFIG_EMBEDDED
+       /* No one can have a base priority less than MAXPRI_THROTTLE */
+       if (priority < MAXPRI_THROTTLE) {
+               priority = MAXPRI_THROTTLE;
+       }
+#endif /* CONFIG_EMBEDDED */
 
        sched_set_thread_base_priority(thread, priority);
 }
@@ -861,10 +1081,10 @@ thread_recompute_priority(
 /* Called with the task lock held, but not the thread mutex or spinlock */
 void
 thread_policy_update_tasklocked(
-                                thread_t           thread,
-                                integer_t          priority,
-                                integer_t          max_priority,
-                                task_pend_token_t  pend_token)
+       thread_t           thread,
+       integer_t          priority,
+       integer_t          max_priority,
+       task_pend_token_t  pend_token)
 {
        thread_mtx_lock(thread);
 
@@ -882,6 +1102,23 @@ thread_policy_update_tasklocked(
        thread->task_priority = priority;
        thread->max_priority = max_priority;
 
+#if CONFIG_EMBEDDED
+       /*
+        * When backgrounding a thread, iOS has the semantic that
+        * realtime and fixed priority threads should be demoted
+        * to timeshare background threads.
+        *
+        * On OSX, realtime and fixed priority threads don't lose their mode.
+        *
+        * TODO: Do this inside the thread policy update routine in order to avoid double
+        * remove/reinsert for a runnable thread
+        */
+       if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
+               sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
+       } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
+               sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
+       }
+#endif /* CONFIG_EMBEDDED */
 
        thread_policy_update_spinlocked(thread, TRUE, pend_token);
 
@@ -899,20 +1136,22 @@ thread_policy_update_tasklocked(
  */
 void
 thread_policy_reset(
-       thread_t                thread)
+       thread_t                thread)
 {
-       spl_t           s;
+       spl_t           s;
 
        assert(thread == current_thread());
 
        s = splsched();
        thread_lock(thread);
 
-       if (thread->sched_flags & TH_SFLAG_FAILSAFE)
+       if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
                sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
+       }
 
-       if (thread->sched_flags & TH_SFLAG_THROTTLED)
+       if (thread->sched_flags & TH_SFLAG_THROTTLED) {
                sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
+       }
 
        /* At this point, the various demotions should be inactive */
        assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
@@ -937,50 +1176,51 @@ thread_policy_reset(
 
 kern_return_t
 thread_policy_get(
-       thread_t                                thread,
-       thread_policy_flavor_t  flavor,
-       thread_policy_t                 policy_info,
-       mach_msg_type_number_t  *count,
-       boolean_t                               *get_default)
+       thread_t                                thread,
+       thread_policy_flavor_t  flavor,
+       thread_policy_t                 policy_info,
+       mach_msg_type_number_t  *count,
+       boolean_t                               *get_default)
 {
-       kern_return_t                   result = KERN_SUCCESS;
+       kern_return_t                   result = KERN_SUCCESS;
 
-       if (thread == THREAD_NULL)
-               return (KERN_INVALID_ARGUMENT);
+       if (thread == THREAD_NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
 
        thread_mtx_lock(thread);
        if (!thread->active) {
                thread_mtx_unlock(thread);
 
-               return (KERN_TERMINATED);
+               return KERN_TERMINATED;
        }
 
        switch (flavor) {
-
        case THREAD_EXTENDED_POLICY:
        {
-               boolean_t               timeshare = TRUE;
+               boolean_t               timeshare = TRUE;
 
                if (!(*get_default)) {
                        spl_t s = splsched();
                        thread_lock(thread);
 
-                       if (     (thread->sched_mode != TH_MODE_REALTIME)       &&
-                                        (thread->saved_mode != TH_MODE_REALTIME)                       ) {
-                               if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
+                       if ((thread->sched_mode != TH_MODE_REALTIME) &&
+                           (thread->saved_mode != TH_MODE_REALTIME)) {
+                               if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
                                        timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
-                               else
+                               } else {
                                        timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
-                       }
-                       else
+                               }
+                       } else {
                                *get_default = TRUE;
+                       }
 
                        thread_unlock(thread);
                        splx(s);
                }
 
                if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
-                       thread_extended_policy_t        info;
+                       thread_extended_policy_t        info;
 
                        info = (thread_extended_policy_t)policy_info;
                        info->timeshare = timeshare;
@@ -991,7 +1231,7 @@ thread_policy_get(
 
        case THREAD_TIME_CONSTRAINT_POLICY:
        {
-               thread_time_constraint_policy_t         info;
+               thread_time_constraint_policy_t         info;
 
                if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
                        result = KERN_INVALID_ARGUMENT;
@@ -1004,15 +1244,15 @@ thread_policy_get(
                        spl_t s = splsched();
                        thread_lock(thread);
 
-                       if (    (thread->sched_mode == TH_MODE_REALTIME)        ||
-                                       (thread->saved_mode == TH_MODE_REALTIME)                ) {
+                       if ((thread->sched_mode == TH_MODE_REALTIME) ||
+                           (thread->saved_mode == TH_MODE_REALTIME)) {
                                info->period = thread->realtime.period;
                                info->computation = thread->realtime.computation;
                                info->constraint = thread->realtime.constraint;
                                info->preemptible = thread->realtime.preemptible;
-                       }
-                       else
+                       } else {
                                *get_default = TRUE;
+                       }
 
                        thread_unlock(thread);
                        splx(s);
@@ -1030,7 +1270,7 @@ thread_policy_get(
 
        case THREAD_PRECEDENCE_POLICY:
        {
-               thread_precedence_policy_t              info;
+               thread_precedence_policy_t              info;
 
                if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
                        result = KERN_INVALID_ARGUMENT;
@@ -1047,16 +1287,16 @@ thread_policy_get(
 
                        thread_unlock(thread);
                        splx(s);
-               }
-               else
+               } else {
                        info->importance = 0;
+               }
 
                break;
        }
 
        case THREAD_AFFINITY_POLICY:
        {
-               thread_affinity_policy_t                info;
+               thread_affinity_policy_t                info;
 
                if (!thread_affinity_is_supported()) {
                        result = KERN_NOT_SUPPORTED;
@@ -1069,17 +1309,18 @@ thread_policy_get(
 
                info = (thread_affinity_policy_t)policy_info;
 
-               if (!(*get_default))
+               if (!(*get_default)) {
                        info->affinity_tag = thread_affinity_get(thread);
-               else
+               } else {
                        info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
+               }
 
                break;
        }
 
        case THREAD_POLICY_STATE:
        {
-               thread_policy_state_t           info;
+               thread_policy_state_t           info;
 
                if (*count < THREAD_POLICY_STATE_COUNT) {
                        result = KERN_INVALID_ARGUMENT;
@@ -1105,9 +1346,9 @@ thread_policy_get(
                        info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
                        info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
 
-                       info->thps_user_promotions          = thread->user_promotions;
+                       info->thps_user_promotions          = 0;
                        info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
-                       info->thps_ipc_overrides            = thread->ipc_overrides;
+                       info->thps_ipc_overrides            = thread->kevent_overrides;
 
                        proc_get_thread_policy_bitfield(thread, info);
 
@@ -1121,7 +1362,7 @@ thread_policy_get(
 
                break;
        }
-       
+
        case THREAD_LATENCY_QOS_POLICY:
        {
                thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
@@ -1174,7 +1415,7 @@ thread_policy_get(
                if (!(*get_default)) {
                        int relprio_value = 0;
                        info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
-                                                                      TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
+                           TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
 
                        info->tier_importance = -relprio_value;
                } else {
@@ -1192,55 +1433,16 @@ thread_policy_get(
 
        thread_mtx_unlock(thread);
 
-       return (result);
-}
-
-static volatile uint64_t unique_work_interval_id = 1; /* Start at 1, 0 is not a valid work interval ID */
-
-kern_return_t
-thread_policy_create_work_interval(
-       thread_t                thread,
-       uint64_t                *work_interval_id)
-{
-       thread_mtx_lock(thread);
-       if (thread->work_interval_id) {
-               /* already assigned a work interval ID */
-               thread_mtx_unlock(thread);
-               return (KERN_INVALID_VALUE);
-       }
-
-       thread->work_interval_id = OSIncrementAtomic64((volatile int64_t *)&unique_work_interval_id);
-       *work_interval_id = thread->work_interval_id;
-
-       thread_mtx_unlock(thread);
-       return KERN_SUCCESS;
-}
-
-kern_return_t
-thread_policy_destroy_work_interval(
-       thread_t                thread,
-       uint64_t                work_interval_id)
-{
-       thread_mtx_lock(thread);
-       if (work_interval_id == 0 || thread->work_interval_id == 0 || thread->work_interval_id != work_interval_id) {
-               /* work ID isn't valid or doesn't match previously assigned work interval ID */
-               thread_mtx_unlock(thread);
-               return (KERN_INVALID_ARGUMENT);
-       }
-
-       thread->work_interval_id = 0;
-
-       thread_mtx_unlock(thread);
-       return KERN_SUCCESS;
+       return result;
 }
 
 void
 thread_policy_create(thread_t thread)
 {
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
-                                 thread_tid(thread), theffective_0(thread),
-                                 theffective_1(thread), thread->base_pri, 0);
+           (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+           thread_tid(thread), theffective_0(thread),
+           theffective_1(thread), thread->base_pri, 0);
 
        /* We pass a pend token but ignore it */
        struct task_pend_token pend_token = {};
@@ -1248,25 +1450,25 @@ thread_policy_create(thread_t thread)
        thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
-                                 thread_tid(thread), theffective_0(thread),
-                                 theffective_1(thread), thread->base_pri, 0);
+           (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+           thread_tid(thread), theffective_0(thread),
+           theffective_1(thread), thread->base_pri, 0);
 }
 
 static void
 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
 {
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
-                                 thread_tid(thread), theffective_0(thread),
-                                 theffective_1(thread), thread->base_pri, 0);
+           (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
+           thread_tid(thread), theffective_0(thread),
+           theffective_1(thread), thread->base_pri, 0);
 
        thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
-                                 thread_tid(thread), theffective_0(thread),
-                                 theffective_1(thread), thread->base_pri, 0);
+           (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
+           thread_tid(thread), theffective_0(thread),
+           theffective_1(thread), thread->base_pri, 0);
 }
 
 
@@ -1283,7 +1485,7 @@ thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, t
  */
 static void
 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
-                                         task_pend_token_t pend_token)
+    task_pend_token_t pend_token)
 {
        /*
         * Step 1:
@@ -1307,24 +1509,22 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
        uint32_t next_qos = requested.thrp_qos;
 
        if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
-               if (requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
-                       next_qos = MAX(requested.thrp_qos_override, next_qos);
-
-               if (requested.thrp_qos_promote != THREAD_QOS_UNSPECIFIED)
-                       next_qos = MAX(requested.thrp_qos_promote, next_qos);
-
-               if (requested.thrp_qos_ipc_override != THREAD_QOS_UNSPECIFIED)
-                       next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
+               next_qos = MAX(requested.thrp_qos_override, next_qos);
+               next_qos = MAX(requested.thrp_qos_promote, next_qos);
+               next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
+               next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
+               next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
        }
 
        next.thep_qos = next_qos;
 
        /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
        if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
-               if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
+               if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
                        next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
-               else
+               } else {
                        next.thep_qos = task_effective.tep_qos_clamp;
+               }
        }
 
        /*
@@ -1335,10 +1535,13 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
 
        /* The ceiling only applies to threads that are in the QoS world */
        if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
-           next.thep_qos                  != THREAD_QOS_UNSPECIFIED) {
+           next.thep_qos != THREAD_QOS_UNSPECIFIED) {
                next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
        }
 
+       /* Apply the sync ipc qos override */
+       assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
+
        /*
         * The QoS relative priority is only applicable when the original programmer's
         * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
@@ -1365,30 +1568,37 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
         * but only some types of darwinbg change the sockets
         * after they're created
         */
-       if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
+       if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
                wants_all_sockets_bg = wants_darwinbg = TRUE;
+       }
 
-       if (requested.thrp_pidbind_bg)
+       if (requested.thrp_pidbind_bg) {
                wants_all_sockets_bg = wants_darwinbg = TRUE;
+       }
 
-       if (task_effective.tep_darwinbg)
+       if (task_effective.tep_darwinbg) {
                wants_darwinbg = TRUE;
+       }
 
        if (next.thep_qos == THREAD_QOS_BACKGROUND ||
-           next.thep_qos == THREAD_QOS_MAINTENANCE)
+           next.thep_qos == THREAD_QOS_MAINTENANCE) {
                wants_darwinbg = TRUE;
+       }
 
        /* Calculate side effects of DARWIN_BG */
 
-       if (wants_darwinbg)
+       if (wants_darwinbg) {
                next.thep_darwinbg = 1;
+       }
 
-       if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
+       if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
                next.thep_new_sockets_bg = 1;
+       }
 
        /* Don't use task_effective.tep_all_sockets_bg here */
-       if (wants_all_sockets_bg)
+       if (wants_all_sockets_bg) {
                next.thep_all_sockets_bg = 1;
+       }
 
        /* darwinbg implies background QOS (or lower) */
        if (next.thep_darwinbg &&
@@ -1402,8 +1612,9 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
        int iopol = THROTTLE_LEVEL_TIER0;
 
        /* Factor in the task's IO policy */
-       if (next.thep_darwinbg)
+       if (next.thep_darwinbg) {
                iopol = MAX(iopol, task_effective.tep_bg_iotier);
+       }
 
        iopol = MAX(iopol, task_effective.tep_io_tier);
 
@@ -1422,15 +1633,17 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
         */
        boolean_t qos_io_override_active = FALSE;
        if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
-           thread_qos_policy_params.qos_iotier[requested.thrp_qos])
+           thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
                qos_io_override_active = TRUE;
+       }
 
        /* Calculate Passive IO policy */
-       if (requested.thrp_ext_iopassive    ||
-           requested.thrp_int_iopassive    ||
-           qos_io_override_active          ||
-           task_effective.tep_io_passive   )
+       if (requested.thrp_ext_iopassive ||
+           requested.thrp_int_iopassive ||
+           qos_io_override_active ||
+           task_effective.tep_io_passive) {
                next.thep_io_passive = 1;
+       }
 
        /* Calculate timer QOS */
        uint32_t latency_qos = requested.thrp_latency_qos;
@@ -1475,20 +1688,25 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
         *  Pend updates that can't be done while holding the thread lock
         */
 
-       if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
+       if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
                pend_token->tpt_update_sockets = 1;
+       }
 
        /* TODO: Doesn't this only need to be done if the throttle went up? */
-       if (prev.thep_io_tier != next.thep_io_tier)
+       if (prev.thep_io_tier != next.thep_io_tier) {
                pend_token->tpt_update_throttle = 1;
+       }
 
        /*
         * Check for the attributes that sfi_thread_classify() consults,
         *  and trigger SFI re-evaluation.
         */
-       if (prev.thep_qos      != next.thep_qos         ||
-           prev.thep_darwinbg != next.thep_darwinbg    )
+       if (prev.thep_qos != next.thep_qos ||
+           prev.thep_darwinbg != next.thep_darwinbg) {
                pend_token->tpt_update_thread_sfi = 1;
+       }
+
+       integer_t old_base_pri = thread->base_pri;
 
        /*
         * Step 5:
@@ -1496,14 +1714,28 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
         */
 
        /* Check for the attributes that thread_recompute_priority() consults */
-       if (prev.thep_qos               != next.thep_qos                ||
-           prev.thep_qos_relprio       != next.thep_qos_relprio        ||
-           prev.thep_qos_ui_is_urgent  != next.thep_qos_ui_is_urgent   ||
-           prev.thep_terminated        != next.thep_terminated         ||
-           pend_token->tpt_force_recompute_pri == 1                    ||
+       if (prev.thep_qos != next.thep_qos ||
+           prev.thep_qos_relprio != next.thep_qos_relprio ||
+           prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
+           prev.thep_terminated != next.thep_terminated ||
+           pend_token->tpt_force_recompute_pri == 1 ||
            recompute_priority) {
                thread_recompute_priority(thread);
        }
+
+       /*
+        * Check if the thread is waiting on a turnstile and needs priority propagation.
+        */
+       if (pend_token->tpt_update_turnstile &&
+           ((old_base_pri == thread->base_pri) ||
+           !thread_get_waiting_turnstile(thread))) {
+               /*
+                * Reset update turnstile pend token since either
+                * the thread priority did not change or thread is
+                * not blocked on a turnstile.
+                */
+               pend_token->tpt_update_turnstile = 0;
+       }
 }
 
 
@@ -1516,16 +1748,17 @@ thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_pr
  */
 void
 proc_set_thread_policy_with_tid(task_t     task,
-                                uint64_t   tid,
-                                int        category,
-                                int        flavor,
-                                int        value)
+    uint64_t   tid,
+    int        category,
+    int        flavor,
+    int        value)
 {
        /* takes task lock, returns ref'ed thread or NULL */
        thread_t thread = task_findtid(task, tid);
 
-       if (thread == THREAD_NULL)
+       if (thread == THREAD_NULL) {
                return;
+       }
 
        proc_set_thread_policy(thread, category, flavor, value);
 
@@ -1539,9 +1772,9 @@ proc_set_thread_policy_with_tid(task_t     task,
  */
 void
 proc_set_thread_policy(thread_t   thread,
-                       int        category,
-                       int        flavor,
-                       int        value)
+    int        category,
+    int        flavor,
+    int        value)
 {
        struct task_pend_token pend_token = {};
 
@@ -1554,55 +1787,6 @@ proc_set_thread_policy(thread_t   thread,
        thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
-/*
- * KPI for pthread kext to call to set thread base QoS values during a workq wakeup
- * May be called with interrupts disabled and workqueue/waitqueue/kqueue locks held
- *
- * Does NOT do update completion, so the thread MUST be in a safe place WRT
- * IO throttling and SFI.
- *
- * TODO: Can I assert 'it must be in a safe place'?
- */
-kern_return_t
-thread_set_workq_qos(thread_t   thread,
-                     int        qos_tier,
-                     int        relprio) /* relprio is -16 to 0 */
-{
-       assert(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST);
-       assert(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE);
-
-       if (!(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST))
-               return KERN_FAILURE;
-       if (!(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE))
-               return KERN_FAILURE;
-
-       if (qos_tier == THREAD_QOS_UNSPECIFIED) {
-               assert(relprio == 0);
-               if (relprio != 0)
-                       return KERN_FAILURE;
-       }
-
-       assert(thread->static_param);
-       if (!thread->static_param) {
-               return KERN_FAILURE;
-       }
-
-       /* Concern: this doesn't hold the mutex... */
-       //if (!thread->active)
-       //      return KERN_TERMINATED;
-
-       struct task_pend_token pend_token = {};
-
-       proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos_tier, -relprio, &pend_token);
-
-       assert(pend_token.tpt_update_sockets == 0);
-       /* we don't need to update throttle or sfi because pthread kext promises the thread is in a safe place */
-       /* TODO: Do we need to update SFI to ensure it gets tagged with the AST? */
-
-       return KERN_SUCCESS;
-}
-
-
 /*
  * Do the things that can't be done while holding a thread mutex.
  * These are set up to call back into thread policy to get the latest value,
@@ -1618,15 +1802,22 @@ void
 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
 {
 #ifdef MACH_BSD
-       if (pend_token->tpt_update_sockets)
+       if (pend_token->tpt_update_sockets) {
                proc_apply_task_networkbg(thread->task->bsd_info, thread);
+       }
 #endif /* MACH_BSD */
 
-       if (pend_token->tpt_update_throttle)
+       if (pend_token->tpt_update_throttle) {
                rethrottle_thread(thread->uthread);
+       }
 
-       if (pend_token->tpt_update_thread_sfi)
+       if (pend_token->tpt_update_thread_sfi) {
                sfi_reevaluate(thread);
+       }
+
+       if (pend_token->tpt_update_turnstile) {
+               turnstile_update_thread_priority_chain(thread);
+       }
 }
 
 /*
@@ -1635,11 +1826,11 @@ thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_t
  */
 static void
 proc_set_thread_policy_locked(thread_t          thread,
-                              int               category,
-                              int               flavor,
-                              int               value,
-                              int               value2,
-                              task_pend_token_t pend_token)
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
 {
        spl_t s = splsched();
        thread_lock(thread);
@@ -1656,25 +1847,25 @@ proc_set_thread_policy_locked(thread_t          thread,
  */
 static void
 proc_set_thread_policy_spinlocked(thread_t          thread,
-                                  int               category,
-                                  int               flavor,
-                                  int               value,
-                                  int               value2,
-                                  task_pend_token_t pend_token)
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
 {
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
-                                 thread_tid(thread), threquested_0(thread),
-                                 threquested_1(thread), value, 0);
+           (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+           thread_tid(thread), threquested_0(thread),
+           threquested_1(thread), value, 0);
 
-       thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
+       thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
 
        thread_policy_update_spinlocked(thread, FALSE, pend_token);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
-                                 thread_tid(thread), threquested_0(thread),
-                                 threquested_1(thread), tpending(pend_token), 0);
+           (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+           thread_tid(thread), threquested_0(thread),
+           threquested_1(thread), tpending(pend_token), 0);
 }
 
 /*
@@ -1682,103 +1873,116 @@ proc_set_thread_policy_spinlocked(thread_t          thread,
  */
 static void
 thread_set_requested_policy_spinlocked(thread_t     thread,
-                                       int          category,
-                                       int          flavor,
-                                       int          value,
-                                       int          value2)
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
 {
        int tier, passive;
 
        struct thread_requested_policy requested = thread->requested_policy;
 
        switch (flavor) {
-
        /* Category: EXTERNAL and INTERNAL, thread and task */
 
-               case TASK_POLICY_DARWIN_BG:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               requested.thrp_ext_darwinbg = value;
-                       else
-                               requested.thrp_int_darwinbg = value;
-                       break;
+       case TASK_POLICY_DARWIN_BG:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       requested.thrp_ext_darwinbg = value;
+               } else {
+                       requested.thrp_int_darwinbg = value;
+               }
+               break;
 
-               case TASK_POLICY_IOPOL:
-                       proc_iopol_to_tier(value, &tier, &passive);
-                       if (category == TASK_POLICY_EXTERNAL) {
-                               requested.thrp_ext_iotier  = tier;
-                               requested.thrp_ext_iopassive = passive;
-                       } else {
-                               requested.thrp_int_iotier  = tier;
-                               requested.thrp_int_iopassive = passive;
-                       }
-                       break;
+       case TASK_POLICY_IOPOL:
+               proc_iopol_to_tier(value, &tier, &passive);
+               if (category == TASK_POLICY_EXTERNAL) {
+                       requested.thrp_ext_iotier  = tier;
+                       requested.thrp_ext_iopassive = passive;
+               } else {
+                       requested.thrp_int_iotier  = tier;
+                       requested.thrp_int_iopassive = passive;
+               }
+               break;
 
-               case TASK_POLICY_IO:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               requested.thrp_ext_iotier = value;
-                       else
-                               requested.thrp_int_iotier = value;
-                       break;
+       case TASK_POLICY_IO:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       requested.thrp_ext_iotier = value;
+               } else {
+                       requested.thrp_int_iotier = value;
+               }
+               break;
 
-               case TASK_POLICY_PASSIVE_IO:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               requested.thrp_ext_iopassive = value;
-                       else
-                               requested.thrp_int_iopassive = value;
-                       break;
+       case TASK_POLICY_PASSIVE_IO:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       requested.thrp_ext_iopassive = value;
+               } else {
+                       requested.thrp_int_iopassive = value;
+               }
+               break;
 
        /* Category: ATTRIBUTE, thread only */
 
-               case TASK_POLICY_PIDBIND_BG:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_pidbind_bg = value;
-                       break;
+       case TASK_POLICY_PIDBIND_BG:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_pidbind_bg = value;
+               break;
 
-               case TASK_POLICY_LATENCY_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_latency_qos = value;
-                       break;
+       case TASK_POLICY_LATENCY_QOS:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_latency_qos = value;
+               break;
 
-               case TASK_POLICY_THROUGH_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_through_qos = value;
-                       break;
+       case TASK_POLICY_THROUGH_QOS:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_through_qos = value;
+               break;
 
-               case TASK_POLICY_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_qos = value;
-                       break;
+       case TASK_POLICY_QOS_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos_override = value;
+               pend_token->tpt_update_turnstile = 1;
+               break;
 
-               case TASK_POLICY_QOS_OVERRIDE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_qos_override = value;
-                       break;
+       case TASK_POLICY_QOS_AND_RELPRIO:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos = value;
+               requested.thrp_qos_relprio = value2;
+               pend_token->tpt_update_turnstile = 1;
+               DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
+               break;
 
-               case TASK_POLICY_QOS_AND_RELPRIO:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_qos = value;
-                       requested.thrp_qos_relprio = value2;
-                       DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
-                       break;
+       case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos_workq_override = value;
+               pend_token->tpt_update_turnstile = 1;
+               break;
 
-               case TASK_POLICY_QOS_PROMOTE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_qos_promote = value;
-                       break;
+       case TASK_POLICY_QOS_PROMOTE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos_promote = value;
+               break;
 
-               case TASK_POLICY_QOS_IPC_OVERRIDE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_qos_ipc_override = value;
-                       break;
+       case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos_kevent_override = value;
+               pend_token->tpt_update_turnstile = 1;
+               break;
 
-               case TASK_POLICY_TERMINATED:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       requested.thrp_terminated = value;
-                       break;
+       case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_qos_wlsvc_override = value;
+               pend_token->tpt_update_turnstile = 1;
+               break;
 
-               default:
-                       panic("unknown task policy: %d %d %d", category, flavor, value);
-                       break;
+       case TASK_POLICY_TERMINATED:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               requested.thrp_terminated = value;
+               break;
+
+       default:
+               panic("unknown task policy: %d %d %d", category, flavor, value);
+               break;
        }
 
        thread->requested_policy = requested;
@@ -1790,8 +1994,8 @@ thread_set_requested_policy_spinlocked(thread_t     thread,
  */
 int
 proc_get_thread_policy(thread_t   thread,
-                       int        category,
-                       int        flavor)
+    int        category,
+    int        flavor)
 {
        int value = 0;
        thread_mtx_lock(thread);
@@ -1802,9 +2006,9 @@ proc_get_thread_policy(thread_t   thread,
 
 static int
 proc_get_thread_policy_locked(thread_t   thread,
-                              int        category,
-                              int        flavor,
-                              int*       value2)
+    int        category,
+    int        flavor,
+    int*       value2)
 {
        int value = 0;
 
@@ -1824,79 +2028,91 @@ proc_get_thread_policy_locked(thread_t   thread,
  */
 static int
 thread_get_requested_policy_spinlocked(thread_t thread,
-                                       int      category,
-                                       int      flavor,
-                                       int*     value2)
+    int      category,
+    int      flavor,
+    int*     value2)
 {
        int value = 0;
 
        struct thread_requested_policy requested = thread->requested_policy;
 
        switch (flavor) {
-               case TASK_POLICY_DARWIN_BG:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               value = requested.thrp_ext_darwinbg;
-                       else
-                               value = requested.thrp_int_darwinbg;
-                       break;
-               case TASK_POLICY_IOPOL:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               value = proc_tier_to_iopol(requested.thrp_ext_iotier,
-                                                          requested.thrp_ext_iopassive);
-                       else
-                               value = proc_tier_to_iopol(requested.thrp_int_iotier,
-                                                          requested.thrp_int_iopassive);
-                       break;
-               case TASK_POLICY_IO:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               value = requested.thrp_ext_iotier;
-                       else
-                               value = requested.thrp_int_iotier;
-                       break;
-               case TASK_POLICY_PASSIVE_IO:
-                       if (category == TASK_POLICY_EXTERNAL)
-                               value = requested.thrp_ext_iopassive;
-                       else
-                               value = requested.thrp_int_iopassive;
-                       break;
-               case TASK_POLICY_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_qos;
-                       break;
-               case TASK_POLICY_QOS_OVERRIDE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_qos_override;
-                       break;
-               case TASK_POLICY_LATENCY_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_latency_qos;
-                       break;
-               case TASK_POLICY_THROUGH_QOS:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_through_qos;
-                       break;
-               case TASK_POLICY_QOS_AND_RELPRIO:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       assert(value2 != NULL);
-                       value = requested.thrp_qos;
-                       *value2 = requested.thrp_qos_relprio;
-                       break;
-               case TASK_POLICY_QOS_PROMOTE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_qos_promote;
-                       break;
-               case TASK_POLICY_QOS_IPC_OVERRIDE:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_qos_ipc_override;
-                       break;
-               case TASK_POLICY_TERMINATED:
-                       assert(category == TASK_POLICY_ATTRIBUTE);
-                       value = requested.thrp_terminated;
-                       break;
+       case TASK_POLICY_DARWIN_BG:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       value = requested.thrp_ext_darwinbg;
+               } else {
+                       value = requested.thrp_int_darwinbg;
+               }
+               break;
+       case TASK_POLICY_IOPOL:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       value = proc_tier_to_iopol(requested.thrp_ext_iotier,
+                           requested.thrp_ext_iopassive);
+               } else {
+                       value = proc_tier_to_iopol(requested.thrp_int_iotier,
+                           requested.thrp_int_iopassive);
+               }
+               break;
+       case TASK_POLICY_IO:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       value = requested.thrp_ext_iotier;
+               } else {
+                       value = requested.thrp_int_iotier;
+               }
+               break;
+       case TASK_POLICY_PASSIVE_IO:
+               if (category == TASK_POLICY_EXTERNAL) {
+                       value = requested.thrp_ext_iopassive;
+               } else {
+                       value = requested.thrp_int_iopassive;
+               }
+               break;
+       case TASK_POLICY_QOS:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos;
+               break;
+       case TASK_POLICY_QOS_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos_override;
+               break;
+       case TASK_POLICY_LATENCY_QOS:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_latency_qos;
+               break;
+       case TASK_POLICY_THROUGH_QOS:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_through_qos;
+               break;
+       case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos_workq_override;
+               break;
+       case TASK_POLICY_QOS_AND_RELPRIO:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               assert(value2 != NULL);
+               value = requested.thrp_qos;
+               *value2 = requested.thrp_qos_relprio;
+               break;
+       case TASK_POLICY_QOS_PROMOTE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos_promote;
+               break;
+       case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos_kevent_override;
+               break;
+       case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_qos_wlsvc_override;
+               break;
+       case TASK_POLICY_TERMINATED:
+               assert(category == TASK_POLICY_ATTRIBUTE);
+               value = requested.thrp_terminated;
+               break;
 
-               default:
-                       panic("unknown policy_flavor %d", flavor);
-                       break;
+       default:
+               panic("unknown policy_flavor %d", flavor);
+               break;
        }
 
        return value;
@@ -1916,90 +2132,92 @@ thread_get_requested_policy_spinlocked(thread_t thread,
  */
 int
 proc_get_effective_thread_policy(thread_t thread,
-                                 int      flavor)
+    int      flavor)
 {
        int value = 0;
 
        switch (flavor) {
-               case TASK_POLICY_DARWIN_BG:
-                       /*
-                        * This call is used within the timer layer, as well as
-                        * prioritizing requests to the graphics system.
-                        * It also informs SFI and originator-bg-state.
-                        * Returns 1 for background mode, 0 for normal mode
-                        */
-
-                       value = thread->effective_policy.thep_darwinbg ? 1 : 0;
-                       break;
-               case TASK_POLICY_IO:
-                       /*
-                        * The I/O system calls here to find out what throttling tier to apply to an operation.
-                        * Returns THROTTLE_LEVEL_* values
-                        */
-                       value = thread->effective_policy.thep_io_tier;
-                       if (thread->iotier_override != THROTTLE_LEVEL_NONE)
-                               value = MIN(value, thread->iotier_override);
-                       break;
-               case TASK_POLICY_PASSIVE_IO:
-                       /*
-                        * The I/O system calls here to find out whether an operation should be passive.
-                        * (i.e. not cause operations with lower throttle tiers to be throttled)
-                        * Returns 1 for passive mode, 0 for normal mode
-                        *
-                        * If an override is causing IO to go into a lower tier, we also set
-                        * the passive bit so that a thread doesn't end up stuck in its own throttle
-                        * window when the override goes away.
-                        */
-                       value = thread->effective_policy.thep_io_passive ? 1 : 0;
-                       if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
-                           thread->iotier_override < thread->effective_policy.thep_io_tier)
-                               value = 1;
-                       break;
-               case TASK_POLICY_ALL_SOCKETS_BG:
-                       /*
-                        * do_background_socket() calls this to determine whether
-                        * it should change the thread's sockets
-                        * Returns 1 for background mode, 0 for normal mode
-                        * This consults both thread and task so un-DBGing a thread while the task is BG
-                        * doesn't get you out of the network throttle.
-                        */
-                       value = (thread->effective_policy.thep_all_sockets_bg ||
-                                thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
-                       break;
-               case TASK_POLICY_NEW_SOCKETS_BG:
-                       /*
-                        * socreate() calls this to determine if it should mark a new socket as background
-                        * Returns 1 for background mode, 0 for normal mode
-                        */
-                       value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
-                       break;
-               case TASK_POLICY_LATENCY_QOS:
-                       /*
-                        * timer arming calls into here to find out the timer coalescing level
-                        * Returns a latency QoS tier (0-6)
-                        */
-                       value = thread->effective_policy.thep_latency_qos;
-                       break;
-               case TASK_POLICY_THROUGH_QOS:
-                       /*
-                        * This value is passed into the urgency callout from the scheduler
-                        * to the performance management subsystem.
-                        *
-                        * Returns a throughput QoS tier (0-6)
-                        */
-                       value = thread->effective_policy.thep_through_qos;
-                       break;
-               case TASK_POLICY_QOS:
-                       /*
-                        * This is communicated to the performance management layer and SFI.
-                        *
-                        * Returns a QoS policy tier
-                        */
-                       value = thread->effective_policy.thep_qos;
-                       break;
-               default:
-                       panic("unknown thread policy flavor %d", flavor);
-                       break;
+       case TASK_POLICY_DARWIN_BG:
+               /*
+                * This call is used within the timer layer, as well as
+                * prioritizing requests to the graphics system.
+                * It also informs SFI and originator-bg-state.
+                * Returns 1 for background mode, 0 for normal mode
+                */
+
+               value = thread->effective_policy.thep_darwinbg ? 1 : 0;
+               break;
+       case TASK_POLICY_IO:
+               /*
+                * The I/O system calls here to find out what throttling tier to apply to an operation.
+                * Returns THROTTLE_LEVEL_* values
+                */
+               value = thread->effective_policy.thep_io_tier;
+               if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
+                       value = MIN(value, thread->iotier_override);
+               }
+               break;
+       case TASK_POLICY_PASSIVE_IO:
+               /*
+                * The I/O system calls here to find out whether an operation should be passive.
+                * (i.e. not cause operations with lower throttle tiers to be throttled)
+                * Returns 1 for passive mode, 0 for normal mode
+                *
+                * If an override is causing IO to go into a lower tier, we also set
+                * the passive bit so that a thread doesn't end up stuck in its own throttle
+                * window when the override goes away.
+                */
+               value = thread->effective_policy.thep_io_passive ? 1 : 0;
+               if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
+                   thread->iotier_override < thread->effective_policy.thep_io_tier) {
+                       value = 1;
+               }
+               break;
+       case TASK_POLICY_ALL_SOCKETS_BG:
+               /*
+                * do_background_socket() calls this to determine whether
+                * it should change the thread's sockets
+                * Returns 1 for background mode, 0 for normal mode
+                * This consults both thread and task so un-DBGing a thread while the task is BG
+                * doesn't get you out of the network throttle.
+                */
+               value = (thread->effective_policy.thep_all_sockets_bg ||
+                   thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
+               break;
+       case TASK_POLICY_NEW_SOCKETS_BG:
+               /*
+                * socreate() calls this to determine if it should mark a new socket as background
+                * Returns 1 for background mode, 0 for normal mode
+                */
+               value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
+               break;
+       case TASK_POLICY_LATENCY_QOS:
+               /*
+                * timer arming calls into here to find out the timer coalescing level
+                * Returns a latency QoS tier (0-6)
+                */
+               value = thread->effective_policy.thep_latency_qos;
+               break;
+       case TASK_POLICY_THROUGH_QOS:
+               /*
+                * This value is passed into the urgency callout from the scheduler
+                * to the performance management subsystem.
+                *
+                * Returns a throughput QoS tier (0-6)
+                */
+               value = thread->effective_policy.thep_through_qos;
+               break;
+       case TASK_POLICY_QOS:
+               /*
+                * This is communicated to the performance management layer and SFI.
+                *
+                * Returns a QoS policy tier
+                */
+               value = thread->effective_policy.thep_qos;
+               break;
+       default:
+               panic("unknown thread policy flavor %d", flavor);
+               break;
        }
 
        return value;
@@ -2122,7 +2340,8 @@ theffective_1(thread_t thread)
  * However a thread reference must be held on the thread.
  */
 
-void set_thread_iotier_override(thread_t thread, int policy)
+void
+set_thread_iotier_override(thread_t thread, int policy)
 {
        int current_override;
 
@@ -2130,8 +2349,9 @@ void set_thread_iotier_override(thread_t thread, int policy)
        do {
                current_override = thread->iotier_override;
 
-               if (current_override != THROTTLE_LEVEL_NONE)
+               if (current_override != THROTTLE_LEVEL_NONE) {
                        policy = MIN(current_override, policy);
+               }
 
                if (current_override == policy) {
                        /* no effective change */
@@ -2163,18 +2383,15 @@ void set_thread_iotier_override(thread_t thread, int policy)
  * to be handled specially in the future, but for now it's fine to slam
  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
  */
-static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
+static void
+canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
+{
        if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
                /* Map all input resource/type to a single one */
                *resource = USER_ADDR_NULL;
                *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
        } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
                /* no transform */
-       } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
-               /* Map all dispatch overrides to a single one, to avoid memory overhead */
-               if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
-                       *resource = USER_ADDR_NULL;
-               }
        } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
                /* Map all mutex overrides to a single one, to avoid memory overhead */
                if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
@@ -2186,8 +2403,8 @@ static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_
 /* This helper routine finds an existing override if known. Locking should be done by caller */
 static struct thread_qos_override *
 find_qos_override(thread_t thread,
-                  user_addr_t resource,
-                  int resource_type)
+    user_addr_t resource,
+    int resource_type)
 {
        struct thread_qos_override *override;
 
@@ -2206,10 +2423,10 @@ find_qos_override(thread_t thread,
 
 static void
 find_and_decrement_qos_override(thread_t       thread,
-                                user_addr_t    resource,
-                                int            resource_type,
-                                boolean_t      reset,
-                                struct thread_qos_override **free_override_list)
+    user_addr_t    resource,
+    int            resource_type,
+    boolean_t      reset,
+    struct thread_qos_override **free_override_list)
 {
        struct thread_qos_override *override, *override_prev;
 
@@ -2218,9 +2435,8 @@ find_and_decrement_qos_override(thread_t       thread,
        while (override) {
                struct thread_qos_override *override_next = override->override_next;
 
-               if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource  || override->override_resource == resource) &&
+               if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
                    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
-
                        if (reset) {
                                override->override_contended_resource_count = 0;
                        } else {
@@ -2266,11 +2482,7 @@ calculate_requested_qos_override(thread_t thread)
 
        override = thread->overrides;
        while (override) {
-               if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
-                       override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
-                       qos_override = MAX(qos_override, override->override_qos);
-               }
-
+               qos_override = MAX(qos_override, override->override_qos);
                override = override->override_next;
        }
 
@@ -2281,19 +2493,13 @@ calculate_requested_qos_override(thread_t thread)
  * Returns:
  * - 0 on success
  * - EINVAL if some invalid input was passed
- * - EFAULT if user_lock_addr != NULL and needs to be faulted (userland has to
- *   fault and retry)
- * - ESTALE if user_lock_addr != NULL &&
- *   ulock_owner_value_to_port_name(*user_lock_addr) != user_lock_owner
  */
 static int
 proc_thread_qos_add_override_internal(thread_t         thread,
-                                      int              override_qos,
-                                      boolean_t        first_override_for_resource,
-                                      user_addr_t      resource,
-                                      int              resource_type,
-                                      user_addr_t      user_lock_addr,
-                                      mach_port_name_t user_lock_owner)
+    int              override_qos,
+    boolean_t        first_override_for_resource,
+    user_addr_t      resource,
+    int              resource_type)
 {
        struct task_pend_token pend_token = {};
        int rc = 0;
@@ -2301,12 +2507,12 @@ proc_thread_qos_add_override_internal(thread_t         thread,
        thread_mtx_lock(thread);
 
        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
-                                                 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
+           thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
 
        DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
-                       uint64_t, thread->requested_policy.thrp_qos,
-                       uint64_t, thread->effective_policy.thep_qos,
-                       int, override_qos, boolean_t, first_override_for_resource);
+           uint64_t, thread->requested_policy.thrp_qos,
+           uint64_t, thread->effective_policy.thep_qos,
+           int, override_qos, boolean_t, first_override_for_resource);
 
        struct thread_qos_override *override;
        struct thread_qos_override *override_new = NULL;
@@ -2325,26 +2531,6 @@ proc_thread_qos_add_override_internal(thread_t         thread,
                thread_mtx_lock(thread);
                override = find_qos_override(thread, resource, resource_type);
        }
-       if (user_lock_addr) {
-               uint64_t val;
-               /* Workaround lack of explicit support for 'no-fault copyin'
-                * <rdar://problem/24999882>, as disabling preemption prevents paging in
-                */
-               disable_preemption();
-               rc = copyin_word(user_lock_addr, &val, sizeof(user_lock_owner));
-               enable_preemption();
-               if (rc == 0 && ulock_owner_value_to_port_name((uint32_t)val) != user_lock_owner) {
-                       rc = ESTALE;
-               }
-               if (rc) {
-                       prev_qos_override = proc_get_thread_policy_locked(thread,
-                                       TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
-                       new_qos_override = prev_qos_override;
-                       new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
-                       thread_mtx_unlock(thread);
-                       goto out;
-               }
-       }
        if (first_override_for_resource && override) {
                /* Someone else already allocated while the thread lock was dropped */
                override->override_contended_resource_count++;
@@ -2361,10 +2547,11 @@ proc_thread_qos_add_override_internal(thread_t         thread,
        }
 
        if (override) {
-               if (override->override_qos == THREAD_QOS_UNSPECIFIED)
+               if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
                        override->override_qos = override_qos;
-               else
+               } else {
                        override->override_qos = MAX(override->override_qos, override_qos);
+               }
        }
 
        /* Determine how to combine the various overrides into a single current
@@ -2373,12 +2560,12 @@ proc_thread_qos_add_override_internal(thread_t         thread,
        new_qos_override = calculate_requested_qos_override(thread);
 
        prev_qos_override = proc_get_thread_policy_locked(thread,
-                       TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
+           TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
 
        if (new_qos_override != prev_qos_override) {
                proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
-                                             TASK_POLICY_QOS_OVERRIDE,
-                                             new_qos_override, 0, &pend_token);
+                   TASK_POLICY_QOS_OVERRIDE,
+                   new_qos_override, 0, &pend_token);
        }
 
        new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
@@ -2387,42 +2574,27 @@ proc_thread_qos_add_override_internal(thread_t         thread,
 
        thread_policy_update_complete_unlocked(thread, &pend_token);
 
-out:
        if (override_new) {
                zfree(thread_qos_override_zone, override_new);
        }
 
        DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
-                     int, new_qos_override, int, new_effective_qos, int, rc);
+           int, new_qos_override, int, new_effective_qos, int, rc);
 
        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
-                                                 new_qos_override, resource, resource_type, 0, 0);
+           new_qos_override, resource, resource_type, 0, 0);
 
        return rc;
 }
 
 int
-proc_thread_qos_add_override_check_owner(thread_t thread,
-                                         int override_qos,
-                                         boolean_t first_override_for_resource,
-                                         user_addr_t resource,
-                                         int resource_type,
-                                         user_addr_t user_lock_addr,
-                                         mach_port_name_t user_lock_owner)
-{
-       return proc_thread_qos_add_override_internal(thread, override_qos,
-                       first_override_for_resource, resource, resource_type,
-                       user_lock_addr, user_lock_owner);
-}
-
-boolean_t
 proc_thread_qos_add_override(task_t           task,
-                             thread_t         thread,
-                             uint64_t         tid,
-                             int              override_qos,
-                             boolean_t        first_override_for_resource,
-                             user_addr_t      resource,
-                             int              resource_type)
+    thread_t         thread,
+    uint64_t         tid,
+    int              override_qos,
+    boolean_t        first_override_for_resource,
+    user_addr_t      resource,
+    int              resource_type)
 {
        boolean_t has_thread_reference = FALSE;
        int rc = 0;
@@ -2433,34 +2605,32 @@ proc_thread_qos_add_override(task_t           task,
 
                if (thread == THREAD_NULL) {
                        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
-                                                                 tid, 0, 0xdead, 0, 0);
-                       return FALSE;
+                           tid, 0, 0xdead, 0, 0);
+                       return ESRCH;
                }
                has_thread_reference = TRUE;
        } else {
                assert(thread->task == task);
        }
        rc = proc_thread_qos_add_override_internal(thread, override_qos,
-                       first_override_for_resource, resource, resource_type, 0, 0);
+           first_override_for_resource, resource, resource_type);
        if (has_thread_reference) {
                thread_deallocate(thread);
        }
 
-       return rc == 0;
+       return rc;
 }
 
-static int
+static void
 proc_thread_qos_remove_override_internal(thread_t       thread,
-                                         user_addr_t    resource,
-                                         int            resource_type,
-                                         boolean_t      reset,
-                                         boolean_t      squash)
+    user_addr_t    resource,
+    int            resource_type,
+    boolean_t      reset)
 {
        struct task_pend_token pend_token = {};
 
        struct thread_qos_override *deferred_free_override_list = NULL;
-       int new_qos_override, prev_qos_override, new_effective_qos, prev_qos;
-       int new_qos = THREAD_QOS_UNSPECIFIED;
+       int new_qos_override, prev_qos_override, new_effective_qos;
 
        thread_mtx_lock(thread);
 
@@ -2469,11 +2639,11 @@ proc_thread_qos_remove_override_internal(thread_t       thread,
        find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
 
        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
-                             thread_tid(thread), resource, reset, 0, 0);
+           thread_tid(thread), resource, reset, 0, 0);
 
        DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
-                       uint64_t, thread->requested_policy.thrp_qos,
-                       uint64_t, thread->effective_policy.thep_qos);
+           uint64_t, thread->requested_policy.thrp_qos,
+           uint64_t, thread->effective_policy.thep_qos);
 
        /* Determine how to combine the various overrides into a single current requested override */
        new_qos_override = calculate_requested_qos_override(thread);
@@ -2488,20 +2658,9 @@ proc_thread_qos_remove_override_internal(thread_t       thread,
         */
        prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
 
-       if (squash) {
-               /*
-                * Remove the specified overrides, and set the current override as the new base QoS.
-                * Return the new QoS value.
-                */
-               prev_qos = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, NULL);
-
-               new_qos = MAX(prev_qos, prev_qos_override);
-               if (new_qos != prev_qos)
-                       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, new_qos, 0, &pend_token);
-       }
-
-       if (new_qos_override != prev_qos_override)
+       if (new_qos_override != prev_qos_override) {
                proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
+       }
 
        new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
 
@@ -2520,20 +2679,18 @@ proc_thread_qos_remove_override_internal(thread_t       thread,
        }
 
        DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
-                     int, new_qos_override, int, new_effective_qos);
+           int, new_qos_override, int, new_effective_qos);
 
        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
-                             thread_tid(thread), squash, 0, 0, 0);
-
-       return new_qos;
+           thread_tid(thread), 0, 0, 0, 0);
 }
 
-boolean_t
+int
 proc_thread_qos_remove_override(task_t      task,
-                                thread_t    thread,
-                                uint64_t    tid,
-                                user_addr_t resource,
-                                int         resource_type)
+    thread_t    thread,
+    uint64_t    tid,
+    user_addr_t resource,
+    int         resource_type)
 {
        boolean_t has_thread_reference = FALSE;
 
@@ -2543,84 +2700,31 @@ proc_thread_qos_remove_override(task_t      task,
 
                if (thread == THREAD_NULL) {
                        KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
-                                             tid, 0, 0xdead, 0, 0);
-                       return FALSE;
+                           tid, 0, 0xdead, 0, 0);
+                       return ESRCH;
                }
                has_thread_reference = TRUE;
        } else {
                assert(task == thread->task);
        }
 
-       proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE, FALSE);
+       proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
 
-       if (has_thread_reference)
+       if (has_thread_reference) {
                thread_deallocate(thread);
-
-       return TRUE;
-}
-
-boolean_t
-proc_thread_qos_reset_override(task_t       task,
-                               thread_t     thread,
-                               uint64_t     tid,
-                               user_addr_t  resource,
-                               int          resource_type)
-
-{
-       boolean_t has_thread_reference = FALSE;
-
-       if (thread == THREAD_NULL) {
-               thread = task_findtid(task, tid);
-               /* returns referenced thread */
-
-               if (thread == THREAD_NULL) {
-                       KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
-                                             tid, 0, 0xdead, 0, 0);
-                       return FALSE;
-               }
-               has_thread_reference = TRUE;
-       } else {
-               assert(task == thread->task);
        }
 
-       proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, FALSE);
-
-       if (has_thread_reference)
-               thread_deallocate(thread);
-
-       return TRUE;
-}
-
-/*
- * Clears the requested overrides, and replaces the current QoS with the max
- * of the current QoS and the current override, then returns the new QoS.
- *
- * This is useful in order to reset overrides before parking a workqueue thread,
- * but avoid dropping priority and getting preempted right before parking.
- *
- * Called without any locks held.
- */
-int
-proc_thread_qos_squash_override(thread_t thread, user_addr_t resource, int resource_type)
-{
-       return proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, TRUE);
+       return 0;
 }
 
 /* Deallocate before thread termination */
-void proc_thread_qos_deallocate(thread_t thread)
+void
+proc_thread_qos_deallocate(thread_t thread)
 {
-       /*
-        * There are no more references to this thread,
-        * therefore this thread must not own any more locks,
-        * therefore there must not be any more user promotions.
-        */
-       assert(thread->user_promotions == 0);
-       assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
-       assert(thread->user_promotion_basepri == 0);
-
        /* This thread must have no more IPC overrides. */
-       assert(thread->ipc_overrides == 0);
-       assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
+       assert(thread->kevent_overrides == 0);
+       assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
+       assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
 
        /*
         * Clear out any lingering override objects.
@@ -2646,7 +2750,8 @@ void proc_thread_qos_deallocate(thread_t thread)
  * Set up the primordial thread's QoS
  */
 void
-task_set_main_thread_qos(task_t task, thread_t thread) {
+task_set_main_thread_qos(task_t task, thread_t thread)
+{
        struct task_pend_token pend_token = {};
 
        assert(thread->task == task);
@@ -2654,23 +2759,23 @@ task_set_main_thread_qos(task_t task, thread_t thread) {
        thread_mtx_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
-                                 thread_tid(thread), threquested_0(thread), threquested_1(thread),
-                                 thread->requested_policy.thrp_qos, 0);
+           (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
+           thread_tid(thread), threquested_0(thread), threquested_1(thread),
+           thread->requested_policy.thrp_qos, 0);
 
        int primordial_qos = task_compute_main_thread_qos(task);
 
-       proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
-                                     primordial_qos, 0, &pend_token);
+       proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
+           primordial_qos, 0, &pend_token);
 
        thread_mtx_unlock(thread);
 
        thread_policy_update_complete_unlocked(thread, &pend_token);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
-                                 thread_tid(thread), threquested_0(thread), threquested_1(thread),
-                                 primordial_qos, 0);
+           (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
+           thread_tid(thread), threquested_0(thread), threquested_1(thread),
+           primordial_qos, 0);
 }
 
 /*
@@ -2684,96 +2789,185 @@ task_get_default_manager_qos(task_t task)
 {
        int primordial_qos = task_compute_main_thread_qos(task);
 
-       if (primordial_qos == THREAD_QOS_LEGACY)
+       if (primordial_qos == THREAD_QOS_LEGACY) {
                primordial_qos = THREAD_QOS_USER_INITIATED;
+       }
 
        return primordial_qos;
 }
 
-
 /*
- * Promote thread with the user level properties of 'promoter'
- * Mutexes may be held, but it's OK to take the throttle lock
+ * Check if the kernel promotion on thread has changed
+ * and apply it.
  *
- * if 'new_promotion' is TRUE, this is a new promotion.
- * if FALSE, we are updating an existing promotion.
+ * thread locked on entry and exit
  */
-static void
-thread_user_promotion_promote(thread_t  thread,
-                              thread_t  promoter,
-                              struct promote_token* promote_token,
-                              boolean_t new_promotion)
+boolean_t
+thread_recompute_kernel_promotion_locked(thread_t thread)
 {
-       struct task_pend_token pend_token = {};
-
-       uint32_t promoter_base_pri = 0, promoter_qos = THREAD_QOS_UNSPECIFIED;
-
-       spl_t s = splsched();
-       thread_lock(promoter);
+       boolean_t needs_update = FALSE;
+       int kern_promotion_schedpri = thread_get_inheritor_turnstile_sched_priority(thread);
 
        /*
-        * We capture the 'promotion qos' here, which is captured
-        * before task-level clamping.
-        *
-        * This means that if the process gets unclamped while a promotion,
-        * is in effect, the owning thread ends up with the correct QoS.
+        * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
+        * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
+        * and propagates the priority through the chain with the same cap, because as of now it does
+        * not differenciate on the kernel primitive.
         *
-        * This does NOT work correctly across processes, as the correct QoS
-        * in one is not necessarily the correct QoS in another.
-        * When we add support for multi-process ulock boosting, we need to
-        * do something more complex.
+        * If this assumption will change with the adoption of a kernel primitive that does not
+        * cap the when adding/propagating,
+        * then here is the place to put the generic cap for all kernel primitives
+        * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
         */
-       promoter_qos = promoter->effective_policy.thep_qos_promote;
+       assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
 
-       /* TODO: extract 'effective unclamped base pri' instead */
-       promoter_base_pri = promoter->base_pri;
+       if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
+               KDBG(MACHDBG_CODE(
+                           DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
+                   thread_tid(thread),
+                   kern_promotion_schedpri,
+                   thread->kern_promotion_schedpri);
 
-       thread_unlock(promoter);
-       splx(s);
+               needs_update = TRUE;
+               thread->kern_promotion_schedpri = kern_promotion_schedpri;
+               thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
+       }
+
+       return needs_update;
+}
+
+/*
+ * Check if the user promotion on thread has changed
+ * and apply it.
+ *
+ * thread locked on entry, might drop the thread lock
+ * and reacquire it.
+ */
+boolean_t
+thread_recompute_user_promotion_locked(thread_t thread)
+{
+       boolean_t needs_update = FALSE;
+       struct task_pend_token pend_token = {};
+       int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
+       int old_base_pri = thread->base_pri;
+       thread_qos_t qos_promotion;
+
+       /* Check if user promotion has changed */
+       if (thread->user_promotion_basepri == user_promotion_basepri) {
+               return needs_update;
+       } else {
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                   (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
+                   thread_tid(thread),
+                   user_promotion_basepri,
+                   thread->user_promotion_basepri,
+                   0, 0);
+               KDBG(MACHDBG_CODE(
+                           DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
+                   thread_tid(thread),
+                   user_promotion_basepri,
+                   thread->user_promotion_basepri);
+       }
+
+       /* Update the user promotion base pri */
+       thread->user_promotion_basepri = user_promotion_basepri;
+       pend_token.tpt_force_recompute_pri = 1;
+
+       if (user_promotion_basepri <= MAXPRI_THROTTLE) {
+               qos_promotion = THREAD_QOS_UNSPECIFIED;
+       } else {
+               qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
+       }
 
-       /* clamp out realtime to max user pri */
-       promoter_base_pri = MIN(promoter_base_pri, MAXPRI_USER);
+       proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
 
-       /* add in the saved promotion token */
-       assert(promote_token->pt_basepri <= MAXPRI_USER);
+       if (thread_get_waiting_turnstile(thread) &&
+           thread->base_pri != old_base_pri) {
+               needs_update = TRUE;
+       }
 
-       promoter_base_pri = MAX(promoter_base_pri, promote_token->pt_basepri);
-       promoter_qos = MAX(promoter_qos, promote_token->pt_qos);
+       thread_unlock(thread);
 
-       /* save the max for later */
-       promote_token->pt_basepri = promoter_base_pri;
-       promote_token->pt_qos = promoter_qos;
+       thread_policy_update_complete_unlocked(thread, &pend_token);
 
-       s = splsched();
        thread_lock(thread);
 
-       if (new_promotion) {
-               if (thread->user_promotions == 0) {
-                       assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
-                       assert(thread->user_promotion_basepri == 0);
-               }
+       return needs_update;
+}
 
-               thread->user_promotions++;
-       } else {
-               assert(thread->user_promotions > 0);
+/*
+ * Convert the thread user promotion base pri to qos for threads in qos world.
+ * For priority above UI qos, the qos would be set to UI.
+ */
+thread_qos_t
+thread_user_promotion_qos_for_pri(int priority)
+{
+       int qos;
+       for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
+               if (thread_qos_policy_params.qos_pri[qos] <= priority) {
+                       return qos;
+               }
        }
+       return THREAD_QOS_MAINTENANCE;
+}
+
+/*
+ * Set the thread's QoS Kevent override
+ * Owned by the Kevent subsystem
+ *
+ * May be called with spinlocks held, but not spinlocks
+ * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
+ *
+ * One 'add' must be balanced by one 'drop'.
+ * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
+ * Before the thread is deallocated, there must be 0 remaining overrides.
+ */
+static void
+thread_kevent_override(thread_t    thread,
+    uint32_t    qos_override,
+    boolean_t   is_new_override)
+{
+       struct task_pend_token pend_token = {};
+       boolean_t needs_update;
 
-       uint32_t thread_qos     = thread->requested_policy.thrp_qos_promote;
-       uint32_t thread_basepri = thread->user_promotion_basepri;
+       spl_t s = splsched();
+       thread_lock(thread);
 
-       uint32_t new_qos     = MAX(thread_qos, promoter_qos);
-       uint32_t new_basepri = MAX(thread_basepri, promoter_base_pri);
+       uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
 
-       /* TODO: Fast path the 'new is lower than effective' case to avoid full reevaluation */
-       if (thread_qos != new_qos || thread_basepri != new_basepri) {
+       assert(qos_override > THREAD_QOS_UNSPECIFIED);
+       assert(qos_override < THREAD_QOS_LAST);
 
-               thread->user_promotion_basepri = new_basepri;
+       if (is_new_override) {
+               if (thread->kevent_overrides++ == 0) {
+                       /* This add is the first override for this thread */
+                       assert(old_override == THREAD_QOS_UNSPECIFIED);
+               } else {
+                       /* There are already other overrides in effect for this thread */
+                       assert(old_override > THREAD_QOS_UNSPECIFIED);
+               }
+       } else {
+               /* There must be at least one override (the previous add call) in effect */
+               assert(thread->kevent_overrides > 0);
+               assert(old_override > THREAD_QOS_UNSPECIFIED);
+       }
 
-               pend_token.tpt_force_recompute_pri = 1;
+       /*
+        * We can't allow lowering if there are several IPC overrides because
+        * the caller can't possibly know the whole truth
+        */
+       if (thread->kevent_overrides == 1) {
+               needs_update = qos_override != old_override;
+       } else {
+               needs_update = qos_override > old_override;
+       }
 
+       if (needs_update) {
                proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
-                                                 TASK_POLICY_QOS_PROMOTE, new_qos,
-                                                 0, &pend_token);
+                   TASK_POLICY_QOS_KEVENT_OVERRIDE,
+                   qos_override, 0, &pend_token);
+               assert(pend_token.tpt_update_sockets == 0);
        }
 
        thread_unlock(thread);
@@ -2782,47 +2976,37 @@ thread_user_promotion_promote(thread_t  thread,
        thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
-/* Add a user promotion to thread */
 void
-thread_user_promotion_add(thread_t thread,
-                          thread_t promoter,
-                          struct promote_token* promote_token)
+thread_add_kevent_override(thread_t thread, uint32_t qos_override)
 {
-       thread_user_promotion_promote(thread, promoter, promote_token, TRUE);
+       thread_kevent_override(thread, qos_override, TRUE);
 }
 
-/* Update an existing user promotion on thread */
 void
-thread_user_promotion_update(thread_t thread,
-                             thread_t promoter,
-                             struct promote_token* promote_token)
+thread_update_kevent_override(thread_t thread, uint32_t qos_override)
 {
-       thread_user_promotion_promote(thread, promoter, promote_token, FALSE);
+       thread_kevent_override(thread, qos_override, FALSE);
 }
 
-/*
- * Drop a user promotion on thread
- * Mutexes may be held, but it's OK to take the throttle lock
- */
 void
-thread_user_promotion_drop(thread_t thread)
+thread_drop_kevent_override(thread_t thread)
 {
        struct task_pend_token pend_token = {};
 
        spl_t s = splsched();
        thread_lock(thread);
 
-       assert(thread->user_promotions > 0);
-
-       if (--thread->user_promotions == 0) {
-               thread->requested_policy.thrp_qos_promote = THREAD_QOS_UNSPECIFIED;
-               thread->user_promotion_basepri = 0;
+       assert(thread->kevent_overrides > 0);
 
-               pend_token.tpt_force_recompute_pri = 1;
+       if (--thread->kevent_overrides == 0) {
+               /*
+                * There are no more overrides for this thread, so we should
+                * clear out the saturated override value
+                */
 
                proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
-                                                 TASK_POLICY_QOS_PROMOTE, THREAD_QOS_UNSPECIFIED,
-                                                 0, &pend_token);
+                   TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
+                   0, &pend_token);
        }
 
        thread_unlock(thread);
@@ -2831,10 +3015,9 @@ thread_user_promotion_drop(thread_t thread)
        thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
-
 /*
- * Set the thread's QoS IPC override
- * Owned by the IPC subsystem
+ * Set the thread's QoS Workloop Servicer override
+ * Owned by the Kevent subsystem
  *
  * May be called with spinlocks held, but not spinlocks
  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
@@ -2844,98 +3027,104 @@ thread_user_promotion_drop(thread_t thread)
  * Before the thread is deallocated, there must be 0 remaining overrides.
  */
 static void
-thread_ipc_override(thread_t    thread,
-                    uint32_t    qos_override,
-                    boolean_t   is_new_override)
+thread_servicer_override(thread_t    thread,
+    uint32_t    qos_override,
+    boolean_t   is_new_override)
 {
        struct task_pend_token pend_token = {};
 
        spl_t s = splsched();
        thread_lock(thread);
 
-       uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
-
        if (is_new_override) {
-               if (thread->ipc_overrides++ == 0) {
-                       /* This add is the first override for this thread */
-                       assert(old_override == THREAD_QOS_UNSPECIFIED);
-               } else {
-                       /* There are already other overrides in effect for this thread */
-                       assert(old_override > THREAD_QOS_UNSPECIFIED);
-               }
+               assert(!thread->requested_policy.thrp_qos_wlsvc_override);
        } else {
-               /* There must be at least one override (the previous add call) in effect */
-               assert(thread->ipc_overrides > 0);
-               assert(old_override > THREAD_QOS_UNSPECIFIED);
+               assert(thread->requested_policy.thrp_qos_wlsvc_override);
        }
 
-       uint32_t new_override = MAX(old_override, qos_override);
-
        proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
-                                         TASK_POLICY_QOS_IPC_OVERRIDE,
-                                         new_override, 0, &pend_token);
-
-       assert(pend_token.tpt_update_sockets == 0);
+           TASK_POLICY_QOS_SERVICER_OVERRIDE,
+           qos_override, 0, &pend_token);
 
        thread_unlock(thread);
        splx(s);
 
-       /*
-        * this is only safe after rethrottle_thread supports
-        * being called from spinlock context
-        */
+       assert(pend_token.tpt_update_sockets == 0);
        thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
 void
-thread_add_ipc_override(thread_t    thread,
-                        uint32_t    qos_override)
+thread_add_servicer_override(thread_t thread, uint32_t qos_override)
 {
-       thread_ipc_override(thread, qos_override, TRUE);
+       assert(qos_override > THREAD_QOS_UNSPECIFIED);
+       assert(qos_override < THREAD_QOS_LAST);
+
+       thread_servicer_override(thread, qos_override, TRUE);
 }
 
 void
-thread_update_ipc_override(thread_t     thread,
-                           uint32_t     qos_override)
+thread_update_servicer_override(thread_t thread, uint32_t qos_override)
 {
-       thread_ipc_override(thread, qos_override, FALSE);
+       assert(qos_override > THREAD_QOS_UNSPECIFIED);
+       assert(qos_override < THREAD_QOS_LAST);
+
+       thread_servicer_override(thread, qos_override, FALSE);
 }
 
 void
-thread_drop_ipc_override(thread_t thread)
+thread_drop_servicer_override(thread_t thread)
 {
-       struct task_pend_token pend_token = {};
-
-       spl_t s = splsched();
-       thread_lock(thread);
+       thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
+}
 
-       assert(thread->ipc_overrides > 0);
 
-       if (--thread->ipc_overrides == 0) {
-               /*
-                * There are no more overrides for this thread, so we should
-                * clear out the saturated override value
-                */
+/* Get current requested qos / relpri, may be called from spinlock context */
+thread_qos_t
+thread_get_requested_qos(thread_t thread, int *relpri)
+{
+       int relprio_value = 0;
+       thread_qos_t qos;
 
-               proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
-                                                 TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
-                                                 0, &pend_token);
+       qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+           TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
+       if (relpri) {
+               *relpri = -relprio_value;
        }
+       return qos;
+}
+
+/*
+ * This function will promote the thread priority
+ * since exec could block other threads calling
+ * proc_find on the proc. This boost must be removed
+ * via call to thread_clear_exec_promotion.
+ *
+ * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
+ */
+void
+thread_set_exec_promotion(thread_t thread)
+{
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
 
        thread_unlock(thread);
        splx(s);
-
-       /*
-        * this is only safe after rethrottle_thread supports
-        * being called from spinlock context
-        */
-       thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
-/* Get current IPC override, may be called from spinlock context */
-uint32_t
-thread_get_ipc_override(thread_t thread)
+/*
+ * This function will clear the exec thread
+ * promotion set on the thread by thread_set_exec_promotion.
+ */
+void
+thread_clear_exec_promotion(thread_t thread)
 {
-       return proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
-}
+       spl_t s = splsched();
+       thread_lock(thread);
+
+       sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
 
+       thread_unlock(thread);
+       splx(s);
+}