X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3e170ce000f1506b7b5d2c5c7faec85ceabb573d..HEAD:/osfmk/kern/thread_policy.c

diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c
index 9a82a198f..e82a67b72 100644
--- a/osfmk/kern/thread_policy.c
+++ b/osfmk/kern/thread_policy.c
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
@@ -35,11 +35,31 @@
 #include <kern/affinity.h>
 #include <mach/task_policy.h>
 #include <kern/sfi.h>
+#include <kern/policy_internal.h>
+#include <sys/errno.h>
+#include <sys/ulock.h>
 
 #include <mach/machine/sdt.h>
 
+#ifdef MACH_BSD
+extern int      proc_selfpid(void);
+extern char *   proc_name_address(void *p);
+extern void     rethrottle_thread(void * uthread);
+#endif /* MACH_BSD */
+
 #define QOS_EXTRACT(q)        ((q) & 0xff)
 
+uint32_t qos_override_mode;
+#define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
+#define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
+#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
+#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
+
+extern zone_t thread_qos_override_zone;
+
+static void
+proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
+
 /*
  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  * to threads that don't have a QoS class set.
@@ -98,28 +118,80 @@ thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 static int
 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 
+static void
+proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 
-extern void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
+static void
+proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 
-boolean_t
-thread_has_qos_policy(thread_t thread) {
-	return (proc_get_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
+static void
+proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
+
+static void
+thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
+
+static int
+thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
+
+static int
+proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
+
+static void
+thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
+
+static void
+thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
+
+void
+thread_policy_init(void)
+{
+	if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
+		printf("QOS override mode: 0x%08x\n", qos_override_mode);
+	} else {
+		qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
+	}
 }
 
-kern_return_t
-thread_remove_qos_policy(thread_t thread) 
+boolean_t
+thread_has_qos_policy(thread_t thread)
 {
-	thread_qos_policy_data_t unspec_qos;
-	unspec_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
-	unspec_qos.tier_importance = 0;
+	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
+}
+
 
+static void
+thread_remove_qos_policy_locked(thread_t thread,
+    task_pend_token_t pend_token)
+{
 	__unused int prev_qos = thread->requested_policy.thrp_qos;
 
 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 
-	return thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&unspec_qos, THREAD_QOS_POLICY_COUNT);
+	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
+	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
+}
+
+kern_return_t
+thread_remove_qos_policy(thread_t thread)
+{
+	struct task_pend_token pend_token = {};
+
+	thread_mtx_lock(thread);
+	if (!thread->active) {
+		thread_mtx_unlock(thread);
+		return KERN_TERMINATED;
+	}
+
+	thread_remove_qos_policy_locked(thread, &pend_token);
+
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	return KERN_SUCCESS;
 }
 
+
 boolean_t
 thread_is_static_param(thread_t thread)
 {
@@ -141,28 +213,30 @@ thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 	int next_lower_qos;
 
 	/* Fast path, since no validation or scaling is needed */
-	if (qos_relprio == 0) return 0;
+	if (qos_relprio == 0) {
+		return 0;
+	}
 
 	switch (qos) {
-		case THREAD_QOS_USER_INTERACTIVE:
-			next_lower_qos = THREAD_QOS_USER_INITIATED;
-			break;
-		case THREAD_QOS_USER_INITIATED:
-			next_lower_qos = THREAD_QOS_LEGACY;
-			break;
-		case THREAD_QOS_LEGACY:
-			next_lower_qos = THREAD_QOS_UTILITY;
-			break;
-		case THREAD_QOS_UTILITY:
-			next_lower_qos = THREAD_QOS_BACKGROUND;
-			break;
-		case THREAD_QOS_MAINTENANCE:
-		case THREAD_QOS_BACKGROUND:
-			next_lower_qos = 0;
-			break;
-		default:
-			panic("Unrecognized QoS %d", qos);
-			return 0;
+	case THREAD_QOS_USER_INTERACTIVE:
+		next_lower_qos = THREAD_QOS_USER_INITIATED;
+		break;
+	case THREAD_QOS_USER_INITIATED:
+		next_lower_qos = THREAD_QOS_LEGACY;
+		break;
+	case THREAD_QOS_LEGACY:
+		next_lower_qos = THREAD_QOS_UTILITY;
+		break;
+	case THREAD_QOS_UTILITY:
+		next_lower_qos = THREAD_QOS_BACKGROUND;
+		break;
+	case THREAD_QOS_MAINTENANCE:
+	case THREAD_QOS_BACKGROUND:
+		next_lower_qos = 0;
+		break;
+	default:
+		panic("Unrecognized QoS %d", qos);
+		return 0;
 	}
 
 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
@@ -184,29 +258,32 @@ thread_qos_scaled_relative_priority(int qos, int qos_relprio)
  * flag set by -qos-policy-allow boot-arg to allow
  * testing thread qos policy from userspace
  */
-boolean_t allow_qos_policy_set = FALSE;
+static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
 
 kern_return_t
 thread_policy_set(
-	thread_t				thread,
-	thread_policy_flavor_t	flavor,
-	thread_policy_t			policy_info,
-	mach_msg_type_number_t	count)
+	thread_t                                thread,
+	thread_policy_flavor_t  flavor,
+	thread_policy_t                 policy_info,
+	mach_msg_type_number_t  count)
 {
 	thread_qos_policy_data_t req_qos;
 	kern_return_t kr;
-	
+
 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 
-	if (thread == THREAD_NULL)
-		return (KERN_INVALID_ARGUMENT);
+	if (thread == THREAD_NULL) {
+		return KERN_INVALID_ARGUMENT;
+	}
 
-	if (allow_qos_policy_set == FALSE) {
-		if (thread_is_static_param(thread))
-			return (KERN_POLICY_STATIC);
+	if (!allow_qos_policy_set) {
+		if (thread_is_static_param(thread)) {
+			return KERN_POLICY_STATIC;
+		}
 
-		if (flavor == THREAD_QOS_POLICY || flavor == THREAD_QOS_POLICY_OVERRIDE)
-			return (KERN_INVALID_ARGUMENT);
+		if (flavor == THREAD_QOS_POLICY) {
+			return KERN_INVALID_ARGUMENT;
+		}
 	}
 
 	/* Threads without static_param set reset their QoS when other policies are applied. */
@@ -236,29 +313,28 @@ thread_policy_set(
 
 kern_return_t
 thread_policy_set_internal(
-	thread_t				thread,
-	thread_policy_flavor_t	flavor,
-	thread_policy_t			policy_info,
-	mach_msg_type_number_t	count)
+	thread_t                     thread,
+	thread_policy_flavor_t       flavor,
+	thread_policy_t              policy_info,
+	mach_msg_type_number_t       count)
 {
-	kern_return_t			result = KERN_SUCCESS;
-	spl_t					s;
+	kern_return_t result = KERN_SUCCESS;
+	struct task_pend_token pend_token = {};
 
 	thread_mtx_lock(thread);
 	if (!thread->active) {
 		thread_mtx_unlock(thread);
 
-		return (KERN_TERMINATED);
+		return KERN_TERMINATED;
 	}
 
 	switch (flavor) {
-
 	case THREAD_EXTENDED_POLICY:
 	{
-		boolean_t				timeshare = TRUE;
+		boolean_t timeshare = TRUE;
 
 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
-			thread_extended_policy_t	info;
+			thread_extended_policy_t info;
 
 			info = (thread_extended_policy_t)policy_info;
 			timeshare = info->timeshare;
@@ -266,7 +342,7 @@ thread_policy_set_internal(
 
 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 
-		s = splsched();
+		spl_t s = splsched();
 		thread_lock(thread);
 
 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
@@ -274,14 +350,14 @@ thread_policy_set_internal(
 		thread_unlock(thread);
 		splx(s);
 
-		sfi_reevaluate(thread);
+		pend_token.tpt_update_thread_sfi = 1;
 
 		break;
 	}
 
 	case THREAD_TIME_CONSTRAINT_POLICY:
 	{
-		thread_time_constraint_policy_t		info;
+		thread_time_constraint_policy_t info;
 
 		if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -289,34 +365,36 @@ thread_policy_set_internal(
 		}
 
 		info = (thread_time_constraint_policy_t)policy_info;
-		if (	info->constraint < info->computation	||
-				info->computation > max_rt_quantum		||
-				info->computation < min_rt_quantum		) {
+
+
+		if (info->constraint < info->computation ||
+		    info->computation > max_rt_quantum ||
+		    info->computation < min_rt_quantum) {
 			result = KERN_INVALID_ARGUMENT;
 			break;
 		}
 
-		s = splsched();
+		spl_t s = splsched();
 		thread_lock(thread);
 
-		thread->realtime.period = info->period;
-		thread->realtime.computation = info->computation;
-		thread->realtime.constraint = info->constraint;
-		thread->realtime.preemptible = info->preemptible;
+		thread->realtime.period         = info->period;
+		thread->realtime.computation    = info->computation;
+		thread->realtime.constraint     = info->constraint;
+		thread->realtime.preemptible    = info->preemptible;
 
 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 
 		thread_unlock(thread);
 		splx(s);
 
-		sfi_reevaluate(thread);
+		pend_token.tpt_update_thread_sfi = 1;
 
 		break;
 	}
 
 	case THREAD_PRECEDENCE_POLICY:
 	{
-		thread_precedence_policy_t		info;
+		thread_precedence_policy_t info;
 
 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -324,7 +402,7 @@ thread_policy_set_internal(
 		}
 		info = (thread_precedence_policy_t)policy_info;
 
-		s = splsched();
+		spl_t s = splsched();
 		thread_lock(thread);
 
 		thread->importance = info->importance;
@@ -339,7 +417,7 @@ thread_policy_set_internal(
 
 	case THREAD_AFFINITY_POLICY:
 	{
-		thread_affinity_policy_t	info;
+		thread_affinity_policy_t info;
 
 		if (!thread_affinity_is_supported()) {
 			result = KERN_NOT_SUPPORTED;
@@ -361,53 +439,84 @@ thread_policy_set_internal(
 		return thread_affinity_set(thread, info->affinity_tag);
 	}
 
+#if !defined(XNU_TARGET_OS_OSX)
+	case THREAD_BACKGROUND_POLICY:
+	{
+		thread_background_policy_t info;
+
+		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
+			result = KERN_INVALID_ARGUMENT;
+			break;
+		}
+
+		if (thread->task != current_task()) {
+			result = KERN_PROTECTION_FAILURE;
+			break;
+		}
+
+		info = (thread_background_policy_t) policy_info;
+
+		int enable;
+
+		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
+			enable = TASK_POLICY_ENABLE;
+		} else {
+			enable = TASK_POLICY_DISABLE;
+		}
+
+		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
+
+		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
+
+		break;
+	}
+#endif /* !defined(XNU_TARGET_OS_OSX) */
+
 	case THREAD_THROUGHPUT_QOS_POLICY:
 	{
 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
-		int tqos;
-		
-		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
+		thread_throughput_qos_t tqos;
+
+		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
 			break;
 		}
 
-		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) !=
-		    KERN_SUCCESS) {
+		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
 			break;
 		}
 
 		tqos = qos_extract(info->thread_throughput_qos_tier);
-		thread->effective_policy.t_through_qos = tqos;
-	}
+
+		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
+
 		break;
+	}
 
 	case THREAD_LATENCY_QOS_POLICY:
 	{
 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
-		int lqos;
-		
-		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
+		thread_latency_qos_t lqos;
+
+		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
 			break;
 		}
 
-		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) !=
-		    KERN_SUCCESS) {
+		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
 			break;
 		}
 
 		lqos = qos_extract(info->thread_latency_qos_tier);
-/* The expected use cases (opt-in) of per-thread latency QoS would seem to
- * preclude any requirement at present to re-evaluate timers on a thread level
- * latency QoS change.
- */
-		thread->effective_policy.t_latency_qos = lqos;
 
-	}
+		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
+
 		break;
+	}
 
 	case THREAD_QOS_POLICY:
-	case THREAD_QOS_POLICY_OVERRIDE:
 	{
 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 
@@ -431,41 +540,9 @@ thread_policy_set_internal(
 			break;
 		}
 
-		/*
-		 * Going into task policy requires the task mutex,
-		 * because of the way synchronization against the IO policy
-		 * subsystem works.
-		 *
-		 * We need to move thread policy to the thread mutex instead.
-		 * <rdar://problem/15831652> separate thread policy from task policy
-		 */
-
-		if (flavor == THREAD_QOS_POLICY_OVERRIDE) {
-			int strongest_override = info->qos_tier;
-
-			if (info->qos_tier != THREAD_QOS_UNSPECIFIED &&
-			    thread->requested_policy.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
-				strongest_override = MAX(thread->requested_policy.thrp_qos_override, info->qos_tier);
-
-			thread_mtx_unlock(thread);
-
-			/* There is a race here. To be closed in <rdar://problem/15831652> separate thread policy from task policy */
-
-			proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, strongest_override);
-
-			return (result);
-		}
-
-		thread_mtx_unlock(thread);
-
-		proc_set_task_policy2(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, info->qos_tier, -info->tier_importance);
+		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
+		    info->qos_tier, -info->tier_importance, &pend_token);
 
-		thread_mtx_lock(thread);
-		if (!thread->active) {
-			thread_mtx_unlock(thread);
-			return (KERN_TERMINATED);
-		}
-		
 		break;
 	}
 
@@ -475,287 +552,431 @@ thread_policy_set_internal(
 	}
 
 	thread_mtx_unlock(thread);
-	return (result);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	return result;
 }
 
 /*
- * thread_set_mode_and_absolute_pri:
- *
- * Set scheduling policy & absolute priority for thread, for deprecated
- * thread_set_policy and thread_policy interfaces.
- *
  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
  * Both result in FIXED mode scheduling.
- *
- * Called with thread mutex locked.
  */
-kern_return_t
-thread_set_mode_and_absolute_pri(
-	thread_t		thread,
-	integer_t		policy,
-	integer_t		priority)
+static sched_mode_t
+convert_policy_to_sched_mode(integer_t policy)
+{
+	switch (policy) {
+	case POLICY_TIMESHARE:
+		return TH_MODE_TIMESHARE;
+	case POLICY_RR:
+	case POLICY_FIFO:
+		return TH_MODE_FIXED;
+	default:
+		panic("unexpected sched policy: %d", policy);
+		return TH_MODE_NONE;
+	}
+}
+
+/*
+ * Called either with the thread mutex locked
+ * or from the pthread kext in a 'safe place'.
+ */
+static kern_return_t
+thread_set_mode_and_absolute_pri_internal(thread_t              thread,
+    sched_mode_t          mode,
+    integer_t             priority,
+    task_pend_token_t     pend_token)
 {
-	spl_t s;
-	sched_mode_t mode;
 	kern_return_t kr = KERN_SUCCESS;
 
-	if (thread_is_static_param(thread))
-		return (KERN_POLICY_STATIC);
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	/* This path isn't allowed to change a thread out of realtime. */
+	if ((thread->sched_mode == TH_MODE_REALTIME) ||
+	    (thread->saved_mode == TH_MODE_REALTIME)) {
+		kr = KERN_FAILURE;
+		goto unlock;
+	}
 
-	if (thread->policy_reset)
-		return (KERN_SUCCESS);
+	if (thread->policy_reset) {
+		kr = KERN_SUCCESS;
+		goto unlock;
+	}
 
-	/* Setting legacy policies on threads kills the current QoS */
-	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
-		thread_mtx_unlock(thread);
+	sched_mode_t old_mode = thread->sched_mode;
 
-		kr = thread_remove_qos_policy(thread);
+	/*
+	 * Reverse engineer and apply the correct importance value
+	 * from the requested absolute priority value.
+	 *
+	 * TODO: Store the absolute priority value instead
+	 */
 
-		thread_mtx_lock(thread);
-		if (!thread->active) {
-			return (KERN_TERMINATED);
-		}
+	if (priority >= thread->max_priority) {
+		priority = thread->max_priority - thread->task_priority;
+	} else if (priority >= MINPRI_KERNEL) {
+		priority -=  MINPRI_KERNEL;
+	} else if (priority >= MINPRI_RESERVED) {
+		priority -=  MINPRI_RESERVED;
+	} else {
+		priority -= BASEPRI_DEFAULT;
 	}
 
-	switch (policy) {
-		case POLICY_TIMESHARE:
-			mode = TH_MODE_TIMESHARE;
-			break;
-		case POLICY_RR:
-		case POLICY_FIFO:
-			mode = TH_MODE_FIXED;
-			break;
-		default:
-			panic("unexpected sched policy: %d", policy);
-			break;
+	priority += thread->task_priority;
+
+	if (priority > thread->max_priority) {
+		priority = thread->max_priority;
+	} else if (priority < MINPRI) {
+		priority = MINPRI;
 	}
 
-	s = splsched();
+	thread->importance = priority - thread->task_priority;
+
+	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
+
+	if (mode != old_mode) {
+		pend_token->tpt_update_thread_sfi = 1;
+	}
+
+unlock:
+	thread_unlock(thread);
+	splx(s);
+
+	return kr;
+}
+
+void
+thread_freeze_base_pri(thread_t thread)
+{
+	assert(thread == current_thread());
+
+	spl_t s = splsched();
 	thread_lock(thread);
 
-	/* This path isn't allowed to change a thread out of realtime. */
-	if ((thread->sched_mode != TH_MODE_REALTIME) &&
-	    (thread->saved_mode != TH_MODE_REALTIME)) {
+	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
+	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
+
+	thread_unlock(thread);
+	splx(s);
+}
+
+bool
+thread_unfreeze_base_pri(thread_t thread)
+{
+	assert(thread == current_thread());
+	integer_t base_pri;
+	ast_t ast = 0;
 
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
+	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
+
+	base_pri = thread->req_base_pri;
+	if (base_pri != thread->base_pri) {
 		/*
-		 * Reverse engineer and apply the correct importance value
-		 * from the requested absolute priority value.
+		 * This function returns "true" if the base pri change
+		 * is the most likely cause for the preemption.
 		 */
+		sched_set_thread_base_priority(thread, base_pri);
+		ast = ast_peek(AST_PREEMPT);
+	}
 
-		if (priority >= thread->max_priority)
-			priority = thread->max_priority - thread->task_priority;
-		else if (priority >= MINPRI_KERNEL)
-			priority -=  MINPRI_KERNEL;
-		else if (priority >= MINPRI_RESERVED)
-			priority -=  MINPRI_RESERVED;
-		else
-			priority -= BASEPRI_DEFAULT;
-
-		priority += thread->task_priority;
+	thread_unlock(thread);
+	splx(s);
 
-		if (priority > thread->max_priority)
-			priority = thread->max_priority;
-		else if (priority < MINPRI)
-			priority = MINPRI;
+	return ast != 0;
+}
 
-		thread->importance = priority - thread->task_priority;
+uint8_t
+thread_workq_pri_for_qos(thread_qos_t qos)
+{
+	assert(qos < THREAD_QOS_LAST);
+	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
+}
 
-		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
+thread_qos_t
+thread_workq_qos_for_pri(int priority)
+{
+	thread_qos_t qos;
+	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
+		// indicate that workq should map >UI threads to workq's
+		// internal notation for above-UI work.
+		return THREAD_QOS_UNSPECIFIED;
+	}
+	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
+		// map a given priority up to the next nearest qos band.
+		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
+			return qos;
+		}
 	}
+	return THREAD_QOS_MAINTENANCE;
+}
+
+/*
+ * private interface for pthread workqueues
+ *
+ * Set scheduling policy & absolute priority for thread
+ * May be called with spinlocks held
+ * Thread mutex lock is not held
+ */
+void
+thread_reset_workq_qos(thread_t thread, uint32_t qos)
+{
+	struct task_pend_token pend_token = {};
+
+	assert(qos < THREAD_QOS_LAST);
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
+	    &pend_token);
+
+	assert(pend_token.tpt_update_sockets == 0);
 
 	thread_unlock(thread);
 	splx(s);
 
-	sfi_reevaluate(thread);
-
-	return (kr);
+	thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
 /*
- * Set the thread's requested mode and recompute priority
- * Called with thread mutex and thread locked
+ * private interface for pthread workqueues
  *
- * TODO: Mitigate potential problems caused by moving thread to end of runq
- * whenever its priority is recomputed
- *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
+ * Set scheduling policy & absolute priority for thread
+ * May be called with spinlocks held
+ * Thread mutex lock is held
  */
-static void
-thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
+void
+thread_set_workq_override(thread_t thread, uint32_t qos)
 {
-	if (thread->policy_reset)
-		return;
+	struct task_pend_token pend_token = {};
 
-	boolean_t removed = thread_run_queue_remove(thread);
+	assert(qos < THREAD_QOS_LAST);
 
-	/*
-	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
-	 * That way there's zero confusion over which the user wants
-	 * and which the kernel wants.
-	 */
-	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
-		thread->saved_mode = mode;
-	else
-		sched_set_thread_mode(thread, mode);
+	spl_t s = splsched();
+	thread_lock(thread);
 
-	thread_recompute_priority(thread);
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 
-	if (removed)
-		thread_run_queue_reinsert(thread, SCHED_TAILQ);
+	assert(pend_token.tpt_update_sockets == 0);
+
+	thread_unlock(thread);
+	splx(s);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
 }
 
-/* called with task lock locked */
+/*
+ * private interface for pthread workqueues
+ *
+ * Set scheduling policy & absolute priority for thread
+ * May be called with spinlocks held
+ * Thread mutex lock is not held
+ */
 void
-thread_recompute_qos(thread_t thread) {
-	spl_t s;
+thread_set_workq_pri(thread_t  thread,
+    thread_qos_t qos,
+    integer_t priority,
+    integer_t policy)
+{
+	struct task_pend_token pend_token = {};
+	sched_mode_t mode = convert_policy_to_sched_mode(policy);
 
-	thread_mtx_lock(thread);
+	assert(qos < THREAD_QOS_LAST);
+	assert(thread->static_param);
 
-	if (!thread->active) {
-		thread_mtx_unlock(thread);
+	if (!thread->static_param || !thread->active) {
 		return;
 	}
 
-	s = splsched();
+	spl_t s = splsched();
 	thread_lock(thread);
 
-	thread_recompute_priority(thread);
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
+	    0, &pend_token);
 
 	thread_unlock(thread);
 	splx(s);
 
-	thread_mtx_unlock(thread);
-}
+	/* Concern: this doesn't hold the mutex... */
 
-/* called with task lock locked and thread_mtx_lock locked */
-void
-thread_update_qos_cpu_time(thread_t thread, boolean_t lock_needed)
-{
-	uint64_t last_qos_change_balance;
-	ledger_amount_t thread_balance_credit;
-	ledger_amount_t thread_balance_debit;
-	ledger_amount_t effective_qos_time;
-	uint64_t ctime;
-	uint64_t remainder = 0, consumed = 0;
-	processor_t		processor;
-	spl_t s;
-	kern_return_t kr;
+	__assert_only kern_return_t kr;
+	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
+	    &pend_token);
+	assert(kr == KERN_SUCCESS);
 
-	if (lock_needed) {
-		s = splsched();
-		thread_lock(thread);
+	if (pend_token.tpt_update_thread_sfi) {
+		sfi_reevaluate(thread);
 	}
-	
-	/*
-	 * Calculation of time elapsed by the thread in the current qos.
-	 * Following is the timeline which shows all the variables used in the calculation below.
-	 *
-	 *       thread ledger      thread ledger
-	 *      cpu_time_last_qos     cpu_time
-	 *              |                |<-   consumed  ->|<- remainder  ->|
-	 * timeline  ----------------------------------------------------------->
-	 *                               |                 |                |
-	 *                            thread_dispatch    ctime           quantum end
-	 *
-	 *              |<-----  effective qos time  ----->|
-	 */
-	
-	/* 
-	 * Calculate time elapsed since last qos change on this thread.
-	 * For cpu time on thread ledger, do not use ledger_get_balance,
-	 * only use credit field of ledger, since
-	 * debit is used by per thread cpu limits and is not zero.
-	 */
-	kr = ledger_get_entries(thread->t_threadledger, thread_ledgers.cpu_time, &thread_balance_credit, &thread_balance_debit);
-	if (kr != KERN_SUCCESS)
-		goto out;
-	last_qos_change_balance = thread->cpu_time_last_qos;
+}
+
+/*
+ * thread_set_mode_and_absolute_pri:
+ *
+ * Set scheduling policy & absolute priority for thread, for deprecated
+ * thread_set_policy and thread_policy interfaces.
+ *
+ * Called with nothing locked.
+ */
+kern_return_t
+thread_set_mode_and_absolute_pri(thread_t   thread,
+    integer_t  policy,
+    integer_t  priority)
+{
+	kern_return_t kr = KERN_SUCCESS;
+	struct task_pend_token pend_token = {};
+
+	sched_mode_t mode = convert_policy_to_sched_mode(policy);
+
+	thread_mtx_lock(thread);
+
+	if (!thread->active) {
+		kr = KERN_TERMINATED;
+		goto unlock;
+	}
+
+	if (thread_is_static_param(thread)) {
+		kr = KERN_POLICY_STATIC;
+		goto unlock;
+	}
+
+	/* Setting legacy policies on threads kills the current QoS */
+	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
+		thread_remove_qos_policy_locked(thread, &pend_token);
+	}
+
+	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
+
+unlock:
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	return kr;
+}
+
+/*
+ * Set the thread's requested mode and recompute priority
+ * Called with thread mutex and thread locked
+ *
+ * TODO: Mitigate potential problems caused by moving thread to end of runq
+ * whenever its priority is recomputed
+ *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
+ */
+static void
+thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
+{
+	if (thread->policy_reset) {
+		return;
+	}
+
+	boolean_t removed = thread_run_queue_remove(thread);
 
 	/*
-	 * If thread running on CPU, calculate time elapsed since this thread was last dispatched on cpu.
-	 * The thread ledger is only updated at context switch, the time since last context swicth is not 
-	 * updated in the thread ledger cpu time.
+	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
+	 * That way there's zero confusion over which the user wants
+	 * and which the kernel wants.
 	 */
-	processor = thread->last_processor;
-	if ((processor != PROCESSOR_NULL) && (processor->state == PROCESSOR_RUNNING) &&
-		   (processor->active_thread == thread)) {
-		ctime = mach_absolute_time();
-	
-		if (processor->quantum_end > ctime)
-			remainder = processor->quantum_end - ctime;
+	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
+		thread->saved_mode = mode;
+	} else {
+		sched_set_thread_mode(thread, mode);
+	}
+
+	thread_recompute_priority(thread);
 
-		consumed = thread->quantum_remaining - remainder;
+	if (removed) {
+		thread_run_queue_reinsert(thread, SCHED_TAILQ);
 	}
+}
+
+/* called at splsched with thread lock locked */
+static void
+thread_update_qos_cpu_time_locked(thread_t thread)
+{
+	task_t task = thread->task;
+	uint64_t timer_sum, timer_delta;
+
 	/*
-	 * There can be multiple qos change in a quantum and in that case the cpu_time_last_qos will
-	 * lie between cpu_time marker and ctime marker shown below. The output of 
-	 * thread_balance - last_qos_change_balance will be negative in such case, but overall outcome
-	 * when consumed is added to it would be positive.
+	 * This is only as accurate as the distance between
+	 * last context switch (embedded) or last user/kernel boundary transition (desktop)
+	 * because user_timer and system_timer are only updated then.
 	 *
-	 *          thread ledger
-	 *            cpu_time
-	 *               |<------------  consumed    --------->|<- remainder  ->|
-	 * timeline  ----------------------------------------------------------->
-	 *               |              |                      |                |
-	 *         thread_dispatch  thread ledger            ctime           quantum end
-	 *                          cpu_time_last_qos
+	 * TODO: Consider running a timer_update operation here to update it first.
+	 *       Maybe doable with interrupts disabled from current thread.
+	 *       If the thread is on a different core, may not be easy to get right.
 	 *
-	 *                              |<-effective qos time->|
+	 * TODO: There should be a function for this in timer.c
 	 */
-	effective_qos_time = (ledger_amount_t) consumed;
-	effective_qos_time += thread_balance_credit - last_qos_change_balance;
 
-	if (lock_needed) {
-		thread_unlock(thread);
-		splx(s);
-	}
+	timer_sum = timer_grab(&thread->user_timer);
+	timer_sum += timer_grab(&thread->system_timer);
+	timer_delta = timer_sum - thread->vtimer_qos_save;
 
-	if (effective_qos_time < 0)
-		return;
+	thread->vtimer_qos_save = timer_sum;
 
-	thread->cpu_time_last_qos += (uint64_t)effective_qos_time;
+	uint64_t* task_counter = NULL;
 
-	/*
-	 * Update the task-level qos stats. Its safe to perform operations on these fields, since we 
-	 * hold the task lock.
-	 */
+	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 	switch (thread->effective_policy.thep_qos) {
-	
-	case THREAD_QOS_DEFAULT:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_default += effective_qos_time;
-		break;
+	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
+	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
+	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
+	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
+	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
+	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
+	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
+	default:
+		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
+	}
 
-	case THREAD_QOS_MAINTENANCE:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_maintenance += effective_qos_time;
-		break;
+	OSAddAtomic64(timer_delta, task_counter);
+
+	/* Update the task-level qos stats atomically, because we don't have the task lock. */
+	switch (thread->requested_policy.thrp_qos) {
+	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
+	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
+	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
+	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
+	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
+	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
+	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
+	default:
+		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
+	}
 
-	case THREAD_QOS_BACKGROUND:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_background += effective_qos_time;
-		break;
+	OSAddAtomic64(timer_delta, task_counter);
+}
 
-	case THREAD_QOS_UTILITY:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_utility += effective_qos_time;
-		break;
+/*
+ * called with no thread locks held
+ * may hold task lock
+ */
+void
+thread_update_qos_cpu_time(thread_t thread)
+{
+	thread_mtx_lock(thread);
 
-	case THREAD_QOS_LEGACY:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_legacy += effective_qos_time;
-		break;
-	
-	case THREAD_QOS_USER_INITIATED:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_user_initiated += effective_qos_time;
-		break;
+	spl_t s = splsched();
+	thread_lock(thread);
 
-	case THREAD_QOS_USER_INTERACTIVE:
-		thread->task->cpu_time_qos_stats.cpu_time_qos_user_interactive += effective_qos_time;
-		break;
-	}
+	thread_update_qos_cpu_time_locked(thread);
 
-	return;
+	thread_unlock(thread);
+	splx(s);
 
-out:
-	if (lock_needed) {
-		thread_unlock(thread);
-		splx(s);
-	}
+	thread_mtx_unlock(thread);
 }
 
 /*
@@ -763,21 +984,25 @@ out:
  *
  * Called with thread_lock and thread mutex held.
  */
+extern thread_t vm_pageout_scan_thread;
+extern boolean_t vps_dynamic_priority_enabled;
+
 void
 thread_recompute_priority(
-	thread_t		thread)
+	thread_t                thread)
 {
-	integer_t		priority;
+	integer_t               priority;
 
-	if (thread->policy_reset)
+	if (thread->policy_reset) {
 		return;
+	}
 
 	if (thread->sched_mode == TH_MODE_REALTIME) {
 		sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
 		return;
 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
 		int qos = thread->effective_policy.thep_qos;
-		int qos_ui_is_urgent = thread->effective_policy.qos_ui_is_urgent;
+		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
 		int qos_scaled_relprio;
 
@@ -792,68 +1017,119 @@ thread_recompute_priority(
 			qos_scaled_relprio += 1;
 		}
 
+		/* TODO: factor in renice priority here? */
+
 		priority += qos_scaled_relprio;
 	} else {
-		if (thread->importance > MAXPRI)
+		if (thread->importance > MAXPRI) {
 			priority = MAXPRI;
-		else if (thread->importance < -MAXPRI)
+		} else if (thread->importance < -MAXPRI) {
 			priority = -MAXPRI;
-		else
+		} else {
 			priority = thread->importance;
+		}
 
 		priority += thread->task_priority;
 	}
 
+	priority = MAX(priority, thread->user_promotion_basepri);
+
+	/*
+	 * Clamp priority back into the allowed range for this task.
+	 *  The initial priority value could be out of this range due to:
+	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
+	 *      Task is user task (max-pri is 63)
+	 *      Task is kernel task (max-pri is 95)
+	 * Note that thread->importance is user-settable to any integer
+	 * via THREAD_PRECEDENCE_POLICY.
+	 */
+	if (priority > thread->max_priority) {
+		if (thread->effective_policy.thep_promote_above_task) {
+			priority = MAX(thread->max_priority, thread->user_promotion_basepri);
+		} else {
+			priority = thread->max_priority;
+		}
+	} else if (priority < MINPRI) {
+		priority = MINPRI;
+	}
+
 	if (thread->saved_mode == TH_MODE_REALTIME &&
-	    thread->sched_flags & TH_SFLAG_FAILSAFE)
+	    thread->sched_flags & TH_SFLAG_FAILSAFE) {
 		priority = DEPRESSPRI;
-
-	if (thread->effective_policy.terminated == TRUE && priority < thread->task_priority) {
-		priority = thread->task_priority;
 	}
 
-	if (priority > thread->max_priority)
-		priority = thread->max_priority;
-	else if (priority < MINPRI)
-		priority = MINPRI;
+	if (thread->effective_policy.thep_terminated == TRUE) {
+		/*
+		 * We temporarily want to override the expected priority to
+		 * ensure that the thread exits in a timely manner.
+		 * Note that this is allowed to exceed thread->max_priority
+		 * so that the thread is no longer clamped to background
+		 * during the final exit phase.
+		 */
+		if (priority < thread->task_priority) {
+			priority = thread->task_priority;
+		}
+		if (priority < BASEPRI_DEFAULT) {
+			priority = BASEPRI_DEFAULT;
+		}
+	}
 
+#if !defined(XNU_TARGET_OS_OSX)
+	/* No one can have a base priority less than MAXPRI_THROTTLE */
+	if (priority < MAXPRI_THROTTLE) {
+		priority = MAXPRI_THROTTLE;
+	}
+#endif /* !defined(XNU_TARGET_OS_OSX) */
 
 	sched_set_thread_base_priority(thread, priority);
 }
 
-/* Called with the thread mutex held */
+/* Called with the task lock held, but not the thread mutex or spinlock */
 void
-thread_task_priority(
-	thread_t		thread,
-	integer_t		priority,
-	integer_t		max_priority)
+thread_policy_update_tasklocked(
+	thread_t           thread,
+	integer_t          priority,
+	integer_t          max_priority,
+	task_pend_token_t  pend_token)
 {
-	spl_t s;
-
-	assert(thread != THREAD_NULL);
+	thread_mtx_lock(thread);
 
-	if (!thread->active || thread->policy_reset)
+	if (!thread->active || thread->policy_reset) {
+		thread_mtx_unlock(thread);
 		return;
+	}
 
-	s = splsched();
+	spl_t s = splsched();
 	thread_lock(thread);
 
+	__unused
 	integer_t old_max_priority = thread->max_priority;
 
-	thread->task_priority = priority;
-	thread->max_priority = max_priority;
+	assert(priority >= INT16_MIN && priority <= INT16_MAX);
+	thread->task_priority = (int16_t)priority;
 
-	/* A thread is 'throttled' when its max priority is below MAXPRI_THROTTLE */
-	if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
-		sched_set_thread_throttled(thread, FALSE);
-	} else if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
-		sched_set_thread_throttled(thread, TRUE);
+	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
+	thread->max_priority = (int16_t)max_priority;
+
+	/*
+	 * When backgrounding a thread, realtime and fixed priority threads
+	 * should be demoted to timeshare background threads.
+	 *
+	 * TODO: Do this inside the thread policy update routine in order to avoid double
+	 * remove/reinsert for a runnable thread
+	 */
+	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
+		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
+	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
+		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
 	}
 
-	thread_recompute_priority(thread);
+	thread_policy_update_spinlocked(thread, true, pend_token);
 
 	thread_unlock(thread);
 	splx(s);
+
+	thread_mtx_unlock(thread);
 }
 
 /*
@@ -864,28 +1140,22 @@ thread_task_priority(
  */
 void
 thread_policy_reset(
-	thread_t		thread)
+	thread_t                thread)
 {
-	spl_t		s;
+	spl_t           s;
 
 	assert(thread == current_thread());
 
 	s = splsched();
 	thread_lock(thread);
 
-	assert_thread_sched_count(thread);
-
-	if (thread->sched_flags & TH_SFLAG_FAILSAFE)
+	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
+	}
 
-	assert_thread_sched_count(thread);
-
-	if (thread->sched_flags & TH_SFLAG_THROTTLED)
-		sched_set_thread_throttled(thread, FALSE);
-
-	assert_thread_sched_count(thread);
-
-	assert(thread->BG_COUNT == 0);
+	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
+		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
+	}
 
 	/* At this point, the various demotions should be inactive */
 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
@@ -899,13 +1169,10 @@ thread_policy_reset(
 
 	thread->importance = 0;
 
-	sched_set_thread_base_priority(thread, thread->task_priority);
-
 	/* Prevent further changes to thread base priority or mode */
 	thread->policy_reset = 1;
 
-	assert(thread->BG_COUNT == 0);
-	assert_thread_sched_count(thread);
+	sched_set_thread_base_priority(thread, thread->task_priority);
 
 	thread_unlock(thread);
 	splx(s);
@@ -913,51 +1180,51 @@ thread_policy_reset(
 
 kern_return_t
 thread_policy_get(
-	thread_t				thread,
-	thread_policy_flavor_t	flavor,
-	thread_policy_t			policy_info,
-	mach_msg_type_number_t	*count,
-	boolean_t				*get_default)
+	thread_t                                thread,
+	thread_policy_flavor_t  flavor,
+	thread_policy_t                 policy_info,
+	mach_msg_type_number_t  *count,
+	boolean_t                               *get_default)
 {
-	kern_return_t			result = KERN_SUCCESS;
-	spl_t					s;
+	kern_return_t                   result = KERN_SUCCESS;
 
-	if (thread == THREAD_NULL)
-		return (KERN_INVALID_ARGUMENT);
+	if (thread == THREAD_NULL) {
+		return KERN_INVALID_ARGUMENT;
+	}
 
 	thread_mtx_lock(thread);
 	if (!thread->active) {
 		thread_mtx_unlock(thread);
 
-		return (KERN_TERMINATED);
+		return KERN_TERMINATED;
 	}
 
 	switch (flavor) {
-
 	case THREAD_EXTENDED_POLICY:
 	{
-		boolean_t		timeshare = TRUE;
+		boolean_t               timeshare = TRUE;
 
 		if (!(*get_default)) {
-			s = splsched();
+			spl_t s = splsched();
 			thread_lock(thread);
 
-			if (	 (thread->sched_mode != TH_MODE_REALTIME)	&&
-					 (thread->saved_mode != TH_MODE_REALTIME)			) {
-				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
+			if ((thread->sched_mode != TH_MODE_REALTIME) &&
+			    (thread->saved_mode != TH_MODE_REALTIME)) {
+				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
-				else
+				} else {
 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
-			}
-			else
+				}
+			} else {
 				*get_default = TRUE;
+			}
 
 			thread_unlock(thread);
 			splx(s);
 		}
 
 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
-			thread_extended_policy_t	info;
+			thread_extended_policy_t        info;
 
 			info = (thread_extended_policy_t)policy_info;
 			info->timeshare = timeshare;
@@ -968,7 +1235,7 @@ thread_policy_get(
 
 	case THREAD_TIME_CONSTRAINT_POLICY:
 	{
-		thread_time_constraint_policy_t		info;
+		thread_time_constraint_policy_t         info;
 
 		if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -978,18 +1245,18 @@ thread_policy_get(
 		info = (thread_time_constraint_policy_t)policy_info;
 
 		if (!(*get_default)) {
-			s = splsched();
+			spl_t s = splsched();
 			thread_lock(thread);
 
-			if (	(thread->sched_mode == TH_MODE_REALTIME)	||
-					(thread->saved_mode == TH_MODE_REALTIME)		) {
+			if ((thread->sched_mode == TH_MODE_REALTIME) ||
+			    (thread->saved_mode == TH_MODE_REALTIME)) {
 				info->period = thread->realtime.period;
 				info->computation = thread->realtime.computation;
 				info->constraint = thread->realtime.constraint;
 				info->preemptible = thread->realtime.preemptible;
-			}
-			else
+			} else {
 				*get_default = TRUE;
+			}
 
 			thread_unlock(thread);
 			splx(s);
@@ -1002,12 +1269,13 @@ thread_policy_get(
 			info->preemptible = TRUE;
 		}
 
+
 		break;
 	}
 
 	case THREAD_PRECEDENCE_POLICY:
 	{
-		thread_precedence_policy_t		info;
+		thread_precedence_policy_t              info;
 
 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -1017,23 +1285,23 @@ thread_policy_get(
 		info = (thread_precedence_policy_t)policy_info;
 
 		if (!(*get_default)) {
-			s = splsched();
+			spl_t s = splsched();
 			thread_lock(thread);
 
 			info->importance = thread->importance;
 
 			thread_unlock(thread);
 			splx(s);
-		}
-		else
+		} else {
 			info->importance = 0;
+		}
 
 		break;
 	}
 
 	case THREAD_AFFINITY_POLICY:
 	{
-		thread_affinity_policy_t		info;
+		thread_affinity_policy_t                info;
 
 		if (!thread_affinity_is_supported()) {
 			result = KERN_NOT_SUPPORTED;
@@ -1046,17 +1314,18 @@ thread_policy_get(
 
 		info = (thread_affinity_policy_t)policy_info;
 
-		if (!(*get_default))
+		if (!(*get_default)) {
 			info->affinity_tag = thread_affinity_get(thread);
-		else
+		} else {
 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
+		}
 
 		break;
 	}
 
 	case THREAD_POLICY_STATE:
 	{
-		thread_policy_state_t		info;
+		thread_policy_state_t           info;
 
 		if (*count < THREAD_POLICY_STATE_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -1069,21 +1338,27 @@ thread_policy_get(
 			break;
 		}
 
-		info = (thread_policy_state_t)policy_info;
+		info = (thread_policy_state_t)(void*)policy_info;
 
 		if (!(*get_default)) {
 			info->flags = 0;
 
+			spl_t s = splsched();
+			thread_lock(thread);
+
 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
 
-			/*
-			 * Unlock the thread mutex and directly return.
-			 * This is necessary because proc_get_thread_policy()
-			 * takes the task lock.
-			 */
-			thread_mtx_unlock(thread);
-			proc_get_thread_policy(thread, info);
-			return (result);
+			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
+			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
+
+			info->thps_user_promotions          = 0;
+			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
+			info->thps_ipc_overrides            = thread->kevent_overrides;
+
+			proc_get_thread_policy_bitfield(thread, info);
+
+			thread_unlock(thread);
+			splx(s);
 		} else {
 			info->requested = 0;
 			info->effective = 0;
@@ -1092,11 +1367,11 @@ thread_policy_get(
 
 		break;
 	}
-	
+
 	case THREAD_LATENCY_QOS_POLICY:
 	{
 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
-		uint32_t plqos;
+		thread_latency_qos_t plqos;
 
 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -1106,7 +1381,7 @@ thread_policy_get(
 		if (*get_default) {
 			plqos = 0;
 		} else {
-			plqos = thread->effective_policy.t_latency_qos;
+			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
 		}
 
 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
@@ -1116,7 +1391,7 @@ thread_policy_get(
 	case THREAD_THROUGHPUT_QOS_POLICY:
 	{
 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
-		uint32_t ptqos;
+		thread_throughput_qos_t ptqos;
 
 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 			result = KERN_INVALID_ARGUMENT;
@@ -1126,7 +1401,7 @@ thread_policy_get(
 		if (*get_default) {
 			ptqos = 0;
 		} else {
-			ptqos = thread->effective_policy.t_through_qos;
+			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
 		}
 
 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
@@ -1134,7 +1409,6 @@ thread_policy_get(
 	break;
 
 	case THREAD_QOS_POLICY:
-	case THREAD_QOS_POLICY_OVERRIDE:
 	{
 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 
@@ -1144,14 +1418,11 @@ thread_policy_get(
 		}
 
 		if (!(*get_default)) {
-			if (flavor == THREAD_QOS_POLICY_OVERRIDE) {
-				info->qos_tier = thread->requested_policy.thrp_qos_override;
-				/* TODO: handle importance overrides */
-				info->tier_importance = 0;
-			} else {
-				info->qos_tier = thread->requested_policy.thrp_qos;
-				info->tier_importance = thread->importance;
-			}
+			int relprio_value = 0;
+			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
+
+			info->tier_importance = -relprio_value;
 		} else {
 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
 			info->tier_importance = 0;
@@ -1167,44 +1438,1715 @@ thread_policy_get(
 
 	thread_mtx_unlock(thread);
 
-	return (result);
+	return result;
 }
 
-static volatile uint64_t unique_work_interval_id = 1; /* Start at 1, 0 is not a valid work interval ID */
+void
+thread_policy_create(thread_t thread)
+{
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+	    thread_tid(thread), theffective_0(thread),
+	    theffective_1(thread), thread->base_pri, 0);
+
+	/* We pass a pend token but ignore it */
+	struct task_pend_token pend_token = {};
 
-kern_return_t
-thread_policy_create_work_interval(
-	thread_t		thread,
-	uint64_t		*work_interval_id)
+	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
+
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+	    thread_tid(thread), theffective_0(thread),
+	    theffective_1(thread), thread->base_pri, 0);
+}
+
+static void
+thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
 {
-	thread_mtx_lock(thread);
-	if (thread->work_interval_id) {
-		/* already assigned a work interval ID */
-		thread_mtx_unlock(thread);
-		return (KERN_INVALID_VALUE);
-	}
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
+	    thread_tid(thread), theffective_0(thread),
+	    theffective_1(thread), thread->base_pri, 0);
 
-	thread->work_interval_id = OSIncrementAtomic64((volatile int64_t *)&unique_work_interval_id);
-	*work_interval_id = thread->work_interval_id;
+	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
 
-	thread_mtx_unlock(thread);
-	return KERN_SUCCESS;
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
+	    thread_tid(thread), theffective_0(thread),
+	    theffective_1(thread), thread->base_pri, 0);
 }
 
-kern_return_t
-thread_policy_destroy_work_interval(
-	thread_t		thread,
-	uint64_t		work_interval_id)
+
+
+/*
+ * One thread state update function TO RULE THEM ALL
+ *
+ * This function updates the thread effective policy fields
+ * and pushes the results to the relevant subsystems.
+ *
+ * Returns TRUE if a pended action needs to be run.
+ *
+ * Called with thread spinlock locked, task may be locked, thread mutex may be locked
+ */
+static void
+thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
+    task_pend_token_t pend_token)
 {
-	thread_mtx_lock(thread);
-	if (work_interval_id == 0 || thread->work_interval_id == 0 || thread->work_interval_id != work_interval_id) {
-		/* work ID isn't valid or doesn't match previously assigned work interval ID */
-		thread_mtx_unlock(thread);
-		return (KERN_INVALID_ARGUMENT);
+	/*
+	 * Step 1:
+	 *  Gather requested policy and effective task state
+	 */
+
+	struct thread_requested_policy requested = thread->requested_policy;
+	struct task_effective_policy task_effective = thread->task->effective_policy;
+
+	/*
+	 * Step 2:
+	 *  Calculate new effective policies from requested policy, task and thread state
+	 *  Rules:
+	 *      Don't change requested, it won't take effect
+	 */
+
+	struct thread_effective_policy next = {};
+
+	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
+
+	uint32_t next_qos = requested.thrp_qos;
+
+	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
+		next_qos = MAX(requested.thrp_qos_override, next_qos);
+		next_qos = MAX(requested.thrp_qos_promote, next_qos);
+		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
+		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
+		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
 	}
 
-	thread->work_interval_id = 0;
+	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
+	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
+		/*
+		 * This thread is turnstile-boosted higher than the adaptive clamp
+		 * by a synchronous waiter. Allow that to override the adaptive
+		 * clamp temporarily for this thread only.
+		 */
+		next.thep_promote_above_task = true;
+		next_qos = requested.thrp_qos_promote;
+	}
 
-	thread_mtx_unlock(thread);
-	return KERN_SUCCESS;
+	next.thep_qos = next_qos;
+
+	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
+	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
+		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
+			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
+		} else {
+			next.thep_qos = task_effective.tep_qos_clamp;
+		}
+	}
+
+	/*
+	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
+	 * This allows QoS promotions to work properly even after the process is unclamped.
+	 */
+	next.thep_qos_promote = next.thep_qos;
+
+	/* The ceiling only applies to threads that are in the QoS world */
+	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
+	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
+	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
+		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
+	}
+
+	/*
+	 * The QoS relative priority is only applicable when the original programmer's
+	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
+	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
+	 * since otherwise it would be lower than unclamped threads. Similarly, in the
+	 * presence of boosting, the programmer doesn't know what other actors
+	 * are boosting the thread.
+	 */
+	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
+	    (requested.thrp_qos == next.thep_qos) &&
+	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
+		next.thep_qos_relprio = requested.thrp_qos_relprio;
+	} else {
+		next.thep_qos_relprio = 0;
+	}
+
+	/* Calculate DARWIN_BG */
+	bool wants_darwinbg        = false;
+	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
+
+	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
+		wants_darwinbg = true;
+	}
+
+	/*
+	 * If DARWIN_BG has been requested at either level, it's engaged.
+	 * darwinbg threads always create bg sockets,
+	 * but only some types of darwinbg change the sockets
+	 * after they're created
+	 */
+	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
+		wants_all_sockets_bg = wants_darwinbg = true;
+	}
+
+	if (requested.thrp_pidbind_bg) {
+		wants_all_sockets_bg = wants_darwinbg = true;
+	}
+
+	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
+	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
+		wants_darwinbg = true;
+	}
+
+	/* Calculate side effects of DARWIN_BG */
+
+	if (wants_darwinbg) {
+		next.thep_darwinbg = 1;
+	}
+
+	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
+		next.thep_new_sockets_bg = 1;
+	}
+
+	/* Don't use task_effective.tep_all_sockets_bg here */
+	if (wants_all_sockets_bg) {
+		next.thep_all_sockets_bg = 1;
+	}
+
+	/* darwinbg implies background QOS (or lower) */
+	if (next.thep_darwinbg &&
+	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
+		next.thep_qos = THREAD_QOS_BACKGROUND;
+		next.thep_qos_relprio = 0;
+	}
+
+	/* Calculate IO policy */
+
+	int iopol = THROTTLE_LEVEL_TIER0;
+
+	/* Factor in the task's IO policy */
+	if (next.thep_darwinbg) {
+		iopol = MAX(iopol, task_effective.tep_bg_iotier);
+	}
+
+	if (!next.thep_promote_above_task) {
+		iopol = MAX(iopol, task_effective.tep_io_tier);
+	}
+
+	/* Look up the associated IO tier value for the QoS class */
+	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
+
+	iopol = MAX(iopol, requested.thrp_int_iotier);
+	iopol = MAX(iopol, requested.thrp_ext_iotier);
+
+	next.thep_io_tier = iopol;
+
+	/*
+	 * If a QoS override is causing IO to go into a lower tier, we also set
+	 * the passive bit so that a thread doesn't end up stuck in its own throttle
+	 * window when the override goes away.
+	 */
+
+	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
+	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
+	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
+
+	/* Calculate Passive IO policy */
+	if (requested.thrp_ext_iopassive ||
+	    requested.thrp_int_iopassive ||
+	    qos_io_override_active ||
+	    task_effective.tep_io_passive) {
+		next.thep_io_passive = 1;
+	}
+
+	/* Calculate timer QOS */
+	uint32_t latency_qos = requested.thrp_latency_qos;
+
+	if (!next.thep_promote_above_task) {
+		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
+	}
+
+	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
+
+	next.thep_latency_qos = latency_qos;
+
+	/* Calculate throughput QOS */
+	uint32_t through_qos = requested.thrp_through_qos;
+
+	if (!next.thep_promote_above_task) {
+		through_qos = MAX(through_qos, task_effective.tep_through_qos);
+	}
+
+	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
+
+	next.thep_through_qos = through_qos;
+
+	if (task_effective.tep_terminated || requested.thrp_terminated) {
+		/* Shoot down the throttles that slow down exit or response to SIGTERM */
+		next.thep_terminated    = 1;
+		next.thep_darwinbg      = 0;
+		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
+		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
+		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
+		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
+	}
+
+	/*
+	 * Step 3:
+	 *  Swap out old policy for new policy
+	 */
+
+	struct thread_effective_policy prev = thread->effective_policy;
+
+	thread_update_qos_cpu_time_locked(thread);
+
+	/* This is the point where the new values become visible to other threads */
+	thread->effective_policy = next;
+
+	/*
+	 * Step 4:
+	 *  Pend updates that can't be done while holding the thread lock
+	 */
+
+	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
+		pend_token->tpt_update_sockets = 1;
+	}
+
+	/* TODO: Doesn't this only need to be done if the throttle went up? */
+	if (prev.thep_io_tier != next.thep_io_tier) {
+		pend_token->tpt_update_throttle = 1;
+	}
+
+	/*
+	 * Check for the attributes that sfi_thread_classify() consults,
+	 *  and trigger SFI re-evaluation.
+	 */
+	if (prev.thep_qos != next.thep_qos ||
+	    prev.thep_darwinbg != next.thep_darwinbg) {
+		pend_token->tpt_update_thread_sfi = 1;
+	}
+
+	integer_t old_base_pri = thread->base_pri;
+
+	/*
+	 * Step 5:
+	 *  Update other subsystems as necessary if something has changed
+	 */
+
+	/* Check for the attributes that thread_recompute_priority() consults */
+	if (prev.thep_qos != next.thep_qos ||
+	    prev.thep_qos_relprio != next.thep_qos_relprio ||
+	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
+	    prev.thep_promote_above_task != next.thep_promote_above_task ||
+	    prev.thep_terminated != next.thep_terminated ||
+	    pend_token->tpt_force_recompute_pri == 1 ||
+	    recompute_priority) {
+		thread_recompute_priority(thread);
+	}
+
+	/*
+	 * Check if the thread is waiting on a turnstile and needs priority propagation.
+	 */
+	if (pend_token->tpt_update_turnstile &&
+	    ((old_base_pri == thread->base_pri) ||
+	    !thread_get_waiting_turnstile(thread))) {
+		/*
+		 * Reset update turnstile pend token since either
+		 * the thread priority did not change or thread is
+		 * not blocked on a turnstile.
+		 */
+		pend_token->tpt_update_turnstile = 0;
+	}
+}
+
+
+/*
+ * Initiate a thread policy state transition on a thread with its TID
+ * Useful if you cannot guarantee the thread won't get terminated
+ * Precondition: No locks are held
+ * Will take task lock - using the non-tid variant is faster
+ * if you already have a thread ref.
+ */
+void
+proc_set_thread_policy_with_tid(task_t     task,
+    uint64_t   tid,
+    int        category,
+    int        flavor,
+    int        value)
+{
+	/* takes task lock, returns ref'ed thread or NULL */
+	thread_t thread = task_findtid(task, tid);
+
+	if (thread == THREAD_NULL) {
+		return;
+	}
+
+	proc_set_thread_policy(thread, category, flavor, value);
+
+	thread_deallocate(thread);
+}
+
+/*
+ * Initiate a thread policy transition on a thread
+ * This path supports networking transitions (i.e. darwinbg transitions)
+ * Precondition: No locks are held
+ */
+void
+proc_set_thread_policy(thread_t   thread,
+    int        category,
+    int        flavor,
+    int        value)
+{
+	struct task_pend_token pend_token = {};
+
+	thread_mtx_lock(thread);
+
+	proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
+
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+/*
+ * Do the things that can't be done while holding a thread mutex.
+ * These are set up to call back into thread policy to get the latest value,
+ * so they don't have to be synchronized with the update.
+ * The only required semantic is 'call this sometime after updating effective policy'
+ *
+ * Precondition: Thread mutex is not held
+ *
+ * This may be called with the task lock held, but in that case it won't be
+ * called with tpt_update_sockets set.
+ */
+void
+thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
+{
+#ifdef MACH_BSD
+	if (pend_token->tpt_update_sockets) {
+		proc_apply_task_networkbg(thread->task->bsd_info, thread);
+	}
+#endif /* MACH_BSD */
+
+	if (pend_token->tpt_update_throttle) {
+		rethrottle_thread(thread->uthread);
+	}
+
+	if (pend_token->tpt_update_thread_sfi) {
+		sfi_reevaluate(thread);
+	}
+
+	if (pend_token->tpt_update_turnstile) {
+		turnstile_update_thread_priority_chain(thread);
+	}
+}
+
+/*
+ * Set and update thread policy
+ * Thread mutex might be held
+ */
+static void
+proc_set_thread_policy_locked(thread_t          thread,
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
+{
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
+
+	thread_unlock(thread);
+	splx(s);
+}
+
+/*
+ * Set and update thread policy
+ * Thread spinlock is held
+ */
+static void
+proc_set_thread_policy_spinlocked(thread_t          thread,
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
+{
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
+	    thread_tid(thread), threquested_0(thread),
+	    threquested_1(thread), value, 0);
+
+	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
+
+	thread_policy_update_spinlocked(thread, false, pend_token);
+
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
+	    thread_tid(thread), threquested_0(thread),
+	    threquested_1(thread), tpending(pend_token), 0);
+}
+
+/*
+ * Set the requested state for a specific flavor to a specific value.
+ */
+static void
+thread_set_requested_policy_spinlocked(thread_t     thread,
+    int               category,
+    int               flavor,
+    int               value,
+    int               value2,
+    task_pend_token_t pend_token)
+{
+	int tier, passive;
+
+	struct thread_requested_policy requested = thread->requested_policy;
+
+	switch (flavor) {
+	/* Category: EXTERNAL and INTERNAL, thread and task */
+
+	case TASK_POLICY_DARWIN_BG:
+		if (category == TASK_POLICY_EXTERNAL) {
+			requested.thrp_ext_darwinbg = value;
+		} else {
+			requested.thrp_int_darwinbg = value;
+		}
+		break;
+
+	case TASK_POLICY_IOPOL:
+		proc_iopol_to_tier(value, &tier, &passive);
+		if (category == TASK_POLICY_EXTERNAL) {
+			requested.thrp_ext_iotier  = tier;
+			requested.thrp_ext_iopassive = passive;
+		} else {
+			requested.thrp_int_iotier  = tier;
+			requested.thrp_int_iopassive = passive;
+		}
+		break;
+
+	case TASK_POLICY_IO:
+		if (category == TASK_POLICY_EXTERNAL) {
+			requested.thrp_ext_iotier = value;
+		} else {
+			requested.thrp_int_iotier = value;
+		}
+		break;
+
+	case TASK_POLICY_PASSIVE_IO:
+		if (category == TASK_POLICY_EXTERNAL) {
+			requested.thrp_ext_iopassive = value;
+		} else {
+			requested.thrp_int_iopassive = value;
+		}
+		break;
+
+	/* Category: ATTRIBUTE, thread only */
+
+	case TASK_POLICY_PIDBIND_BG:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_pidbind_bg = value;
+		break;
+
+	case TASK_POLICY_LATENCY_QOS:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_latency_qos = value;
+		break;
+
+	case TASK_POLICY_THROUGH_QOS:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_through_qos = value;
+		break;
+
+	case TASK_POLICY_QOS_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos_override = value;
+		pend_token->tpt_update_turnstile = 1;
+		break;
+
+	case TASK_POLICY_QOS_AND_RELPRIO:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos = value;
+		requested.thrp_qos_relprio = value2;
+		pend_token->tpt_update_turnstile = 1;
+		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
+		break;
+
+	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos_workq_override = value;
+		pend_token->tpt_update_turnstile = 1;
+		break;
+
+	case TASK_POLICY_QOS_PROMOTE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos_promote = value;
+		break;
+
+	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos_kevent_override = value;
+		pend_token->tpt_update_turnstile = 1;
+		break;
+
+	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_qos_wlsvc_override = value;
+		pend_token->tpt_update_turnstile = 1;
+		break;
+
+	case TASK_POLICY_TERMINATED:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		requested.thrp_terminated = value;
+		break;
+
+	default:
+		panic("unknown task policy: %d %d %d", category, flavor, value);
+		break;
+	}
+
+	thread->requested_policy = requested;
+}
+
+/*
+ * Gets what you set. Effective values may be different.
+ * Precondition: No locks are held
+ */
+int
+proc_get_thread_policy(thread_t   thread,
+    int        category,
+    int        flavor)
+{
+	int value = 0;
+	thread_mtx_lock(thread);
+	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
+	thread_mtx_unlock(thread);
+	return value;
+}
+
+static int
+proc_get_thread_policy_locked(thread_t   thread,
+    int        category,
+    int        flavor,
+    int*       value2)
+{
+	int value = 0;
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
+
+	thread_unlock(thread);
+	splx(s);
+
+	return value;
+}
+
+/*
+ * Gets what you set. Effective values may be different.
+ */
+static int
+thread_get_requested_policy_spinlocked(thread_t thread,
+    int      category,
+    int      flavor,
+    int*     value2)
+{
+	int value = 0;
+
+	struct thread_requested_policy requested = thread->requested_policy;
+
+	switch (flavor) {
+	case TASK_POLICY_DARWIN_BG:
+		if (category == TASK_POLICY_EXTERNAL) {
+			value = requested.thrp_ext_darwinbg;
+		} else {
+			value = requested.thrp_int_darwinbg;
+		}
+		break;
+	case TASK_POLICY_IOPOL:
+		if (category == TASK_POLICY_EXTERNAL) {
+			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
+			    requested.thrp_ext_iopassive);
+		} else {
+			value = proc_tier_to_iopol(requested.thrp_int_iotier,
+			    requested.thrp_int_iopassive);
+		}
+		break;
+	case TASK_POLICY_IO:
+		if (category == TASK_POLICY_EXTERNAL) {
+			value = requested.thrp_ext_iotier;
+		} else {
+			value = requested.thrp_int_iotier;
+		}
+		break;
+	case TASK_POLICY_PASSIVE_IO:
+		if (category == TASK_POLICY_EXTERNAL) {
+			value = requested.thrp_ext_iopassive;
+		} else {
+			value = requested.thrp_int_iopassive;
+		}
+		break;
+	case TASK_POLICY_QOS:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos;
+		break;
+	case TASK_POLICY_QOS_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos_override;
+		break;
+	case TASK_POLICY_LATENCY_QOS:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_latency_qos;
+		break;
+	case TASK_POLICY_THROUGH_QOS:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_through_qos;
+		break;
+	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos_workq_override;
+		break;
+	case TASK_POLICY_QOS_AND_RELPRIO:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		assert(value2 != NULL);
+		value = requested.thrp_qos;
+		*value2 = requested.thrp_qos_relprio;
+		break;
+	case TASK_POLICY_QOS_PROMOTE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos_promote;
+		break;
+	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos_kevent_override;
+		break;
+	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_qos_wlsvc_override;
+		break;
+	case TASK_POLICY_TERMINATED:
+		assert(category == TASK_POLICY_ATTRIBUTE);
+		value = requested.thrp_terminated;
+		break;
+
+	default:
+		panic("unknown policy_flavor %d", flavor);
+		break;
+	}
+
+	return value;
+}
+
+/*
+ * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
+ *
+ * NOTE: This accessor does not take the task or thread lock.
+ * Notifications of state updates need to be externally synchronized with state queries.
+ * This routine *MUST* remain interrupt safe, as it is potentially invoked
+ * within the context of a timer interrupt.
+ *
+ * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
+ *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
+ *      I don't think that cost is worth not having the right answer.
+ */
+int
+proc_get_effective_thread_policy(thread_t thread,
+    int      flavor)
+{
+	int value = 0;
+
+	switch (flavor) {
+	case TASK_POLICY_DARWIN_BG:
+		/*
+		 * This call is used within the timer layer, as well as
+		 * prioritizing requests to the graphics system.
+		 * It also informs SFI and originator-bg-state.
+		 * Returns 1 for background mode, 0 for normal mode
+		 */
+
+		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
+		break;
+	case TASK_POLICY_IO:
+		/*
+		 * The I/O system calls here to find out what throttling tier to apply to an operation.
+		 * Returns THROTTLE_LEVEL_* values
+		 */
+		value = thread->effective_policy.thep_io_tier;
+		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
+			value = MIN(value, thread->iotier_override);
+		}
+		break;
+	case TASK_POLICY_PASSIVE_IO:
+		/*
+		 * The I/O system calls here to find out whether an operation should be passive.
+		 * (i.e. not cause operations with lower throttle tiers to be throttled)
+		 * Returns 1 for passive mode, 0 for normal mode
+		 *
+		 * If an override is causing IO to go into a lower tier, we also set
+		 * the passive bit so that a thread doesn't end up stuck in its own throttle
+		 * window when the override goes away.
+		 */
+		value = thread->effective_policy.thep_io_passive ? 1 : 0;
+		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
+		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
+			value = 1;
+		}
+		break;
+	case TASK_POLICY_ALL_SOCKETS_BG:
+		/*
+		 * do_background_socket() calls this to determine whether
+		 * it should change the thread's sockets
+		 * Returns 1 for background mode, 0 for normal mode
+		 * This consults both thread and task so un-DBGing a thread while the task is BG
+		 * doesn't get you out of the network throttle.
+		 */
+		value = (thread->effective_policy.thep_all_sockets_bg ||
+		    thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
+		break;
+	case TASK_POLICY_NEW_SOCKETS_BG:
+		/*
+		 * socreate() calls this to determine if it should mark a new socket as background
+		 * Returns 1 for background mode, 0 for normal mode
+		 */
+		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
+		break;
+	case TASK_POLICY_LATENCY_QOS:
+		/*
+		 * timer arming calls into here to find out the timer coalescing level
+		 * Returns a latency QoS tier (0-6)
+		 */
+		value = thread->effective_policy.thep_latency_qos;
+		break;
+	case TASK_POLICY_THROUGH_QOS:
+		/*
+		 * This value is passed into the urgency callout from the scheduler
+		 * to the performance management subsystem.
+		 *
+		 * Returns a throughput QoS tier (0-6)
+		 */
+		value = thread->effective_policy.thep_through_qos;
+		break;
+	case TASK_POLICY_QOS:
+		/*
+		 * This is communicated to the performance management layer and SFI.
+		 *
+		 * Returns a QoS policy tier
+		 */
+		value = thread->effective_policy.thep_qos;
+		break;
+	default:
+		panic("unknown thread policy flavor %d", flavor);
+		break;
+	}
+
+	return value;
+}
+
+
+/*
+ * (integer_t) casts limit the number of bits we can fit here
+ * this interface is deprecated and replaced by the _EXT struct ?
+ */
+static void
+proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
+{
+	uint64_t bits = 0;
+	struct thread_requested_policy requested = thread->requested_policy;
+
+	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
+	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
+	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
+	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
+	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
+	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
+
+	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
+	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
+
+	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
+
+	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
+	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
+
+	info->requested = (integer_t) bits;
+	bits = 0;
+
+	struct thread_effective_policy effective = thread->effective_policy;
+
+	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
+
+	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
+	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
+	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
+	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
+
+	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
+
+	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
+	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
+
+	info->effective = (integer_t)bits;
+	bits = 0;
+
+	info->pending = 0;
+}
+
+/*
+ * Sneakily trace either the task and thread requested
+ * or just the thread requested, depending on if we have enough room.
+ * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
+ *
+ *                                LP32            LP64
+ * threquested_0(thread)          thread[0]       task[0]
+ * threquested_1(thread)          thread[1]       thread[0]
+ *
+ */
+
+uintptr_t
+threquested_0(thread_t thread)
+{
+	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
+
+	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
+
+	return raw[0];
+}
+
+uintptr_t
+threquested_1(thread_t thread)
+{
+#if defined __LP64__
+	return *(uintptr_t*)&thread->task->requested_policy;
+#else
+	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
+	return raw[1];
+#endif
+}
+
+uintptr_t
+theffective_0(thread_t thread)
+{
+	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
+
+	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
+	return raw[0];
+}
+
+uintptr_t
+theffective_1(thread_t thread)
+{
+#if defined __LP64__
+	return *(uintptr_t*)&thread->task->effective_policy;
+#else
+	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
+	return raw[1];
+#endif
+}
+
+
+/*
+ * Set an override on the thread which is consulted with a
+ * higher priority than the task/thread policy. This should
+ * only be set for temporary grants until the thread
+ * returns to the userspace boundary
+ *
+ * We use atomic operations to swap in the override, with
+ * the assumption that the thread itself can
+ * read the override and clear it on return to userspace.
+ *
+ * No locking is performed, since it is acceptable to see
+ * a stale override for one loop through throttle_lowpri_io().
+ * However a thread reference must be held on the thread.
+ */
+
+void
+set_thread_iotier_override(thread_t thread, int policy)
+{
+	int current_override;
+
+	/* Let most aggressive I/O policy win until user boundary */
+	do {
+		current_override = thread->iotier_override;
+
+		if (current_override != THROTTLE_LEVEL_NONE) {
+			policy = MIN(current_override, policy);
+		}
+
+		if (current_override == policy) {
+			/* no effective change */
+			return;
+		}
+	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
+
+	/*
+	 * Since the thread may be currently throttled,
+	 * re-evaluate tiers and potentially break out
+	 * of an msleep
+	 */
+	rethrottle_thread(thread->uthread);
+}
+
+/*
+ * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
+ * semaphores, dispatch_sync) may result in priority inversions where a higher priority
+ * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
+ * priority thread. In these cases, we attempt to propagate the priority token, as long
+ * as the subsystem informs us of the relationships between the threads. The userspace
+ * synchronization subsystem should maintain the information of owner->resource and
+ * resource->waiters itself.
+ */
+
+/*
+ * This helper canonicalizes the resource/resource_type given the current qos_override_mode
+ * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
+ * to be handled specially in the future, but for now it's fine to slam
+ * *resource to USER_ADDR_NULL even if it was previously a wildcard.
+ */
+static void
+canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
+{
+	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
+		/* Map all input resource/type to a single one */
+		*resource = USER_ADDR_NULL;
+		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
+	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
+		/* no transform */
+	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
+		/* Map all mutex overrides to a single one, to avoid memory overhead */
+		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
+			*resource = USER_ADDR_NULL;
+		}
+	}
+}
+
+/* This helper routine finds an existing override if known. Locking should be done by caller */
+static struct thread_qos_override *
+find_qos_override(thread_t thread,
+    user_addr_t resource,
+    int resource_type)
+{
+	struct thread_qos_override *override;
+
+	override = thread->overrides;
+	while (override) {
+		if (override->override_resource == resource &&
+		    override->override_resource_type == resource_type) {
+			return override;
+		}
+
+		override = override->override_next;
+	}
+
+	return NULL;
+}
+
+static void
+find_and_decrement_qos_override(thread_t       thread,
+    user_addr_t    resource,
+    int            resource_type,
+    boolean_t      reset,
+    struct thread_qos_override **free_override_list)
+{
+	struct thread_qos_override *override, *override_prev;
+
+	override_prev = NULL;
+	override = thread->overrides;
+	while (override) {
+		struct thread_qos_override *override_next = override->override_next;
+
+		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
+		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
+			if (reset) {
+				override->override_contended_resource_count = 0;
+			} else {
+				override->override_contended_resource_count--;
+			}
+
+			if (override->override_contended_resource_count == 0) {
+				if (override_prev == NULL) {
+					thread->overrides = override_next;
+				} else {
+					override_prev->override_next = override_next;
+				}
+
+				/* Add to out-param for later zfree */
+				override->override_next = *free_override_list;
+				*free_override_list = override;
+			} else {
+				override_prev = override;
+			}
+
+			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
+				return;
+			}
+		} else {
+			override_prev = override;
+		}
+
+		override = override_next;
+	}
+}
+
+/* This helper recalculates the current requested override using the policy selected at boot */
+static int
+calculate_requested_qos_override(thread_t thread)
+{
+	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
+		return THREAD_QOS_UNSPECIFIED;
+	}
+
+	/* iterate over all overrides and calculate MAX */
+	struct thread_qos_override *override;
+	int qos_override = THREAD_QOS_UNSPECIFIED;
+
+	override = thread->overrides;
+	while (override) {
+		qos_override = MAX(qos_override, override->override_qos);
+		override = override->override_next;
+	}
+
+	return qos_override;
+}
+
+/*
+ * Returns:
+ * - 0 on success
+ * - EINVAL if some invalid input was passed
+ */
+static int
+proc_thread_qos_add_override_internal(thread_t         thread,
+    int              override_qos,
+    boolean_t        first_override_for_resource,
+    user_addr_t      resource,
+    int              resource_type)
+{
+	struct task_pend_token pend_token = {};
+	int rc = 0;
+
+	thread_mtx_lock(thread);
+
+	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
+	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
+
+	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
+	    uint64_t, thread->requested_policy.thrp_qos,
+	    uint64_t, thread->effective_policy.thep_qos,
+	    int, override_qos, boolean_t, first_override_for_resource);
+
+	struct thread_qos_override *override;
+	struct thread_qos_override *override_new = NULL;
+	int new_qos_override, prev_qos_override;
+	int new_effective_qos;
+
+	canonicalize_resource_and_type(&resource, &resource_type);
+
+	override = find_qos_override(thread, resource, resource_type);
+	if (first_override_for_resource && !override) {
+		/* We need to allocate a new object. Drop the thread lock and
+		 * recheck afterwards in case someone else added the override
+		 */
+		thread_mtx_unlock(thread);
+		override_new = zalloc(thread_qos_override_zone);
+		thread_mtx_lock(thread);
+		override = find_qos_override(thread, resource, resource_type);
+	}
+	if (first_override_for_resource && override) {
+		/* Someone else already allocated while the thread lock was dropped */
+		override->override_contended_resource_count++;
+	} else if (!override && override_new) {
+		override = override_new;
+		override_new = NULL;
+		override->override_next = thread->overrides;
+		/* since first_override_for_resource was TRUE */
+		override->override_contended_resource_count = 1;
+		override->override_resource = resource;
+		override->override_resource_type = (int16_t)resource_type;
+		override->override_qos = THREAD_QOS_UNSPECIFIED;
+		thread->overrides = override;
+	}
+
+	if (override) {
+		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
+			override->override_qos = (int16_t)override_qos;
+		} else {
+			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
+		}
+	}
+
+	/* Determine how to combine the various overrides into a single current
+	 * requested override
+	 */
+	new_qos_override = calculate_requested_qos_override(thread);
+
+	prev_qos_override = proc_get_thread_policy_locked(thread,
+	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
+
+	if (new_qos_override != prev_qos_override) {
+		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+		    TASK_POLICY_QOS_OVERRIDE,
+		    new_qos_override, 0, &pend_token);
+	}
+
+	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
+
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	if (override_new) {
+		zfree(thread_qos_override_zone, override_new);
+	}
+
+	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
+	    int, new_qos_override, int, new_effective_qos, int, rc);
+
+	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
+	    new_qos_override, resource, resource_type, 0, 0);
+
+	return rc;
+}
+
+int
+proc_thread_qos_add_override(task_t           task,
+    thread_t         thread,
+    uint64_t         tid,
+    int              override_qos,
+    boolean_t        first_override_for_resource,
+    user_addr_t      resource,
+    int              resource_type)
+{
+	boolean_t has_thread_reference = FALSE;
+	int rc = 0;
+
+	if (thread == THREAD_NULL) {
+		thread = task_findtid(task, tid);
+		/* returns referenced thread */
+
+		if (thread == THREAD_NULL) {
+			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
+			    tid, 0, 0xdead, 0, 0);
+			return ESRCH;
+		}
+		has_thread_reference = TRUE;
+	} else {
+		assert(thread->task == task);
+	}
+	rc = proc_thread_qos_add_override_internal(thread, override_qos,
+	    first_override_for_resource, resource, resource_type);
+	if (has_thread_reference) {
+		thread_deallocate(thread);
+	}
+
+	return rc;
+}
+
+static void
+proc_thread_qos_remove_override_internal(thread_t       thread,
+    user_addr_t    resource,
+    int            resource_type,
+    boolean_t      reset)
+{
+	struct task_pend_token pend_token = {};
+
+	struct thread_qos_override *deferred_free_override_list = NULL;
+	int new_qos_override, prev_qos_override, new_effective_qos;
+
+	thread_mtx_lock(thread);
+
+	canonicalize_resource_and_type(&resource, &resource_type);
+
+	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
+
+	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
+	    thread_tid(thread), resource, reset, 0, 0);
+
+	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
+	    uint64_t, thread->requested_policy.thrp_qos,
+	    uint64_t, thread->effective_policy.thep_qos);
+
+	/* Determine how to combine the various overrides into a single current requested override */
+	new_qos_override = calculate_requested_qos_override(thread);
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	/*
+	 * The override chain and therefore the value of the current override is locked with thread mutex,
+	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
+	 * This means you can't change the current override from a spinlock-only setter.
+	 */
+	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
+
+	if (new_qos_override != prev_qos_override) {
+		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
+	}
+
+	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
+
+	thread_unlock(thread);
+	splx(s);
+
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	while (deferred_free_override_list) {
+		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
+
+		zfree(thread_qos_override_zone, deferred_free_override_list);
+		deferred_free_override_list = override_next;
+	}
+
+	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
+	    int, new_qos_override, int, new_effective_qos);
+
+	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
+	    thread_tid(thread), 0, 0, 0, 0);
+}
+
+int
+proc_thread_qos_remove_override(task_t      task,
+    thread_t    thread,
+    uint64_t    tid,
+    user_addr_t resource,
+    int         resource_type)
+{
+	boolean_t has_thread_reference = FALSE;
+
+	if (thread == THREAD_NULL) {
+		thread = task_findtid(task, tid);
+		/* returns referenced thread */
+
+		if (thread == THREAD_NULL) {
+			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
+			    tid, 0, 0xdead, 0, 0);
+			return ESRCH;
+		}
+		has_thread_reference = TRUE;
+	} else {
+		assert(task == thread->task);
+	}
+
+	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
+
+	if (has_thread_reference) {
+		thread_deallocate(thread);
+	}
+
+	return 0;
+}
+
+/* Deallocate before thread termination */
+void
+proc_thread_qos_deallocate(thread_t thread)
+{
+	/* This thread must have no more IPC overrides. */
+	assert(thread->kevent_overrides == 0);
+	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
+	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
+
+	/*
+	 * Clear out any lingering override objects.
+	 */
+	struct thread_qos_override *override;
+
+	thread_mtx_lock(thread);
+	override = thread->overrides;
+	thread->overrides = NULL;
+	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
+	/* We don't need to re-evaluate thread policy here because the thread has already exited */
+	thread_mtx_unlock(thread);
+
+	while (override) {
+		struct thread_qos_override *override_next = override->override_next;
+
+		zfree(thread_qos_override_zone, override);
+		override = override_next;
+	}
+}
+
+/*
+ * Set up the primordial thread's QoS
+ */
+void
+task_set_main_thread_qos(task_t task, thread_t thread)
+{
+	struct task_pend_token pend_token = {};
+
+	assert(thread->task == task);
+
+	thread_mtx_lock(thread);
+
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
+	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
+	    thread->requested_policy.thrp_qos, 0);
+
+	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
+
+	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
+	    primordial_qos, 0, &pend_token);
+
+	thread_mtx_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
+	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
+	    primordial_qos, 0);
+}
+
+/*
+ * KPI for pthread kext
+ *
+ * Return a good guess at what the initial manager QoS will be
+ * Dispatch can override this in userspace if it so chooses
+ */
+thread_qos_t
+task_get_default_manager_qos(task_t task)
+{
+	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
+
+	if (primordial_qos == THREAD_QOS_LEGACY) {
+		primordial_qos = THREAD_QOS_USER_INITIATED;
+	}
+
+	return primordial_qos;
+}
+
+/*
+ * Check if the kernel promotion on thread has changed
+ * and apply it.
+ *
+ * thread locked on entry and exit
+ */
+boolean_t
+thread_recompute_kernel_promotion_locked(thread_t thread)
+{
+	boolean_t needs_update = FALSE;
+	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
+
+	/*
+	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
+	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
+	 * and propagates the priority through the chain with the same cap, because as of now it does
+	 * not differenciate on the kernel primitive.
+	 *
+	 * If this assumption will change with the adoption of a kernel primitive that does not
+	 * cap the when adding/propagating,
+	 * then here is the place to put the generic cap for all kernel primitives
+	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
+	 */
+	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
+
+	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
+		KDBG(MACHDBG_CODE(
+			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
+		    thread_tid(thread),
+		    kern_promotion_schedpri,
+		    thread->kern_promotion_schedpri);
+
+		needs_update = TRUE;
+		thread->kern_promotion_schedpri = kern_promotion_schedpri;
+		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
+	}
+
+	return needs_update;
+}
+
+/*
+ * Check if the user promotion on thread has changed
+ * and apply it.
+ *
+ * thread locked on entry, might drop the thread lock
+ * and reacquire it.
+ */
+boolean_t
+thread_recompute_user_promotion_locked(thread_t thread)
+{
+	boolean_t needs_update = FALSE;
+	struct task_pend_token pend_token = {};
+	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
+	int old_base_pri = thread->base_pri;
+	thread_qos_t qos_promotion;
+
+	/* Check if user promotion has changed */
+	if (thread->user_promotion_basepri == user_promotion_basepri) {
+		return needs_update;
+	} else {
+		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
+		    thread_tid(thread),
+		    user_promotion_basepri,
+		    thread->user_promotion_basepri,
+		    0, 0);
+		KDBG(MACHDBG_CODE(
+			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
+		    thread_tid(thread),
+		    user_promotion_basepri,
+		    thread->user_promotion_basepri);
+	}
+
+	/* Update the user promotion base pri */
+	thread->user_promotion_basepri = user_promotion_basepri;
+	pend_token.tpt_force_recompute_pri = 1;
+
+	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
+		qos_promotion = THREAD_QOS_UNSPECIFIED;
+	} else {
+		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
+	}
+
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
+
+	if (thread_get_waiting_turnstile(thread) &&
+	    thread->base_pri != old_base_pri) {
+		needs_update = TRUE;
+	}
+
+	thread_unlock(thread);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+
+	thread_lock(thread);
+
+	return needs_update;
+}
+
+/*
+ * Convert the thread user promotion base pri to qos for threads in qos world.
+ * For priority above UI qos, the qos would be set to UI.
+ */
+thread_qos_t
+thread_user_promotion_qos_for_pri(int priority)
+{
+	thread_qos_t qos;
+	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
+		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
+			return qos;
+		}
+	}
+	return THREAD_QOS_MAINTENANCE;
+}
+
+/*
+ * Set the thread's QoS Kevent override
+ * Owned by the Kevent subsystem
+ *
+ * May be called with spinlocks held, but not spinlocks
+ * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
+ *
+ * One 'add' must be balanced by one 'drop'.
+ * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
+ * Before the thread is deallocated, there must be 0 remaining overrides.
+ */
+static void
+thread_kevent_override(thread_t    thread,
+    uint32_t    qos_override,
+    boolean_t   is_new_override)
+{
+	struct task_pend_token pend_token = {};
+	boolean_t needs_update;
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
+
+	assert(qos_override > THREAD_QOS_UNSPECIFIED);
+	assert(qos_override < THREAD_QOS_LAST);
+
+	if (is_new_override) {
+		if (thread->kevent_overrides++ == 0) {
+			/* This add is the first override for this thread */
+			assert(old_override == THREAD_QOS_UNSPECIFIED);
+		} else {
+			/* There are already other overrides in effect for this thread */
+			assert(old_override > THREAD_QOS_UNSPECIFIED);
+		}
+	} else {
+		/* There must be at least one override (the previous add call) in effect */
+		assert(thread->kevent_overrides > 0);
+		assert(old_override > THREAD_QOS_UNSPECIFIED);
+	}
+
+	/*
+	 * We can't allow lowering if there are several IPC overrides because
+	 * the caller can't possibly know the whole truth
+	 */
+	if (thread->kevent_overrides == 1) {
+		needs_update = qos_override != old_override;
+	} else {
+		needs_update = qos_override > old_override;
+	}
+
+	if (needs_update) {
+		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
+		    qos_override, 0, &pend_token);
+		assert(pend_token.tpt_update_sockets == 0);
+	}
+
+	thread_unlock(thread);
+	splx(s);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+void
+thread_add_kevent_override(thread_t thread, uint32_t qos_override)
+{
+	thread_kevent_override(thread, qos_override, TRUE);
+}
+
+void
+thread_update_kevent_override(thread_t thread, uint32_t qos_override)
+{
+	thread_kevent_override(thread, qos_override, FALSE);
+}
+
+void
+thread_drop_kevent_override(thread_t thread)
+{
+	struct task_pend_token pend_token = {};
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	assert(thread->kevent_overrides > 0);
+
+	if (--thread->kevent_overrides == 0) {
+		/*
+		 * There are no more overrides for this thread, so we should
+		 * clear out the saturated override value
+		 */
+
+		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
+		    0, &pend_token);
+	}
+
+	thread_unlock(thread);
+	splx(s);
+
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+/*
+ * Set the thread's QoS Workloop Servicer override
+ * Owned by the Kevent subsystem
+ *
+ * May be called with spinlocks held, but not spinlocks
+ * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
+ *
+ * One 'add' must be balanced by one 'drop'.
+ * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
+ * Before the thread is deallocated, there must be 0 remaining overrides.
+ */
+static void
+thread_servicer_override(thread_t    thread,
+    uint32_t    qos_override,
+    boolean_t   is_new_override)
+{
+	struct task_pend_token pend_token = {};
+
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	if (is_new_override) {
+		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
+	} else {
+		assert(thread->requested_policy.thrp_qos_wlsvc_override);
+	}
+
+	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
+	    qos_override, 0, &pend_token);
+
+	thread_unlock(thread);
+	splx(s);
+
+	assert(pend_token.tpt_update_sockets == 0);
+	thread_policy_update_complete_unlocked(thread, &pend_token);
+}
+
+void
+thread_add_servicer_override(thread_t thread, uint32_t qos_override)
+{
+	assert(qos_override > THREAD_QOS_UNSPECIFIED);
+	assert(qos_override < THREAD_QOS_LAST);
+
+	thread_servicer_override(thread, qos_override, TRUE);
+}
+
+void
+thread_update_servicer_override(thread_t thread, uint32_t qos_override)
+{
+	assert(qos_override > THREAD_QOS_UNSPECIFIED);
+	assert(qos_override < THREAD_QOS_LAST);
+
+	thread_servicer_override(thread, qos_override, FALSE);
+}
+
+void
+thread_drop_servicer_override(thread_t thread)
+{
+	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
+}
+
+
+/* Get current requested qos / relpri, may be called from spinlock context */
+thread_qos_t
+thread_get_requested_qos(thread_t thread, int *relpri)
+{
+	int relprio_value = 0;
+	thread_qos_t qos;
+
+	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
+	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
+	if (relpri) {
+		*relpri = -relprio_value;
+	}
+	return qos;
+}
+
+/*
+ * This function will promote the thread priority
+ * since exec could block other threads calling
+ * proc_find on the proc. This boost must be removed
+ * via call to thread_clear_exec_promotion.
+ *
+ * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
+ */
+void
+thread_set_exec_promotion(thread_t thread)
+{
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
+
+	thread_unlock(thread);
+	splx(s);
+}
+
+/*
+ * This function will clear the exec thread
+ * promotion set on the thread by thread_set_exec_promotion.
+ */
+void
+thread_clear_exec_promotion(thread_t thread)
+{
+	spl_t s = splsched();
+	thread_lock(thread);
+
+	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
+
+	thread_unlock(thread);
+	splx(s);
 }