]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/sched_prim.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
index 8c70db47d723c860f4a99f9b6a3fed308a92dbca..2b2a98d68ff03b5ce00aa21fcbbcb4c2f0e2c3b6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -82,6 +82,7 @@
 #endif
 
 #include <kern/kern_types.h>
+#include <kern/backtrace.h>
 #include <kern/clock.h>
 #include <kern/counters.h>
 #include <kern/cpu_number.h>
 #include <kern/ledger.h>
 #include <kern/timer_queue.h>
 #include <kern/waitq.h>
+#include <kern/policy_internal.h>
 
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <mach/sdt.h>
 
 #include <sys/kdebug.h>
+#include <kperf/kperf.h>
+#include <kern/kpc.h>
 
 #include <kern/pms.h>
 
-#if defined(CONFIG_TELEMETRY) && defined(CONFIG_SCHED_TIMESHARE_CORE)
-#include <kern/telemetry.h>
-#endif
-
 struct rt_queue        rt_runq;
 
 uintptr_t sched_thread_on_rt_queue = (uintptr_t)0xDEAFBEE0;
@@ -175,15 +175,9 @@ uint32_t   min_rt_quantum;
 
 unsigned       sched_tick;
 uint32_t       sched_tick_interval;
-#if defined(CONFIG_TELEMETRY)
-uint32_t       sched_telemetry_interval;
-#endif /* CONFIG_TELEMETRY */
 
-uint32_t       sched_pri_shift = INT8_MAX;
-uint32_t       sched_background_pri_shift = INT8_MAX;
-uint32_t       sched_combined_fgbg_pri_shift = INT8_MAX;
+uint32_t       sched_pri_shifts[TH_BUCKET_MAX];
 uint32_t       sched_fixed_shift;
-uint32_t       sched_use_combined_fgbg_decay = 0;
 
 uint32_t       sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */
 
@@ -207,9 +201,6 @@ thread_t sched_maintenance_thread;
 
 uint64_t       sched_one_second_interval;
 
-uint32_t       sched_run_count, sched_share_count, sched_background_count;
-uint32_t       sched_load_average, sched_mach_factor;
-
 /* Forwards */
 
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
@@ -270,7 +261,7 @@ sched_vm_group_maintenance(void);
 
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 int8_t         sched_load_shifts[NRQS];
-int            sched_preempt_pri[NRQBM];
+bitmap_t       sched_preempt_pri[BITMAP_LEN(NRQS)];
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
 const struct sched_dispatch_table *sched_current_dispatch = NULL;
@@ -465,21 +456,17 @@ sched_timeshare_timebase_init(void)
                abstime >>= 1;
        sched_fixed_shift = shift;
 
+       for (uint32_t i = 0 ; i < TH_BUCKET_MAX ; i++)
+               sched_pri_shifts[i] = INT8_MAX;
+
        max_unsafe_computation = ((uint64_t)max_unsafe_quanta) * std_quantum;
        sched_safe_duration = 2 * ((uint64_t)max_unsafe_quanta) * std_quantum;
-       
+
        max_poll_computation = ((uint64_t)max_poll_quanta) * std_quantum;
        thread_depress_time = 1 * std_quantum;
        default_timeshare_computation = std_quantum / 2;
        default_timeshare_constraint = std_quantum;
 
-#if defined(CONFIG_TELEMETRY)
-       /* interval for high frequency telemetry */
-       clock_interval_to_absolutetime_interval(10, NSEC_PER_MSEC, &abstime);
-       assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
-       sched_telemetry_interval = (uint32_t)abstime;
-#endif
-
 }
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
@@ -533,10 +520,6 @@ load_shift_init(void)
                kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor);
        }
 
-       if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) {
-               kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay);
-       }
-
        if (sched_decay_penalty == 0) {
                /*
                 * There is no penalty for timeshare threads for using too much
@@ -569,13 +552,13 @@ load_shift_init(void)
 static void
 preempt_pri_init(void)
 {
-       int             i, *p = sched_preempt_pri;
+       bitmap_t *p = sched_preempt_pri;
 
-       for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i)
-               setbit(i, p);
+       for (int i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i)
+               bitmap_set(p, i);
 
-       for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
-               setbit(i, p);
+       for (int i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
+               bitmap_set(p, i);
 }
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
@@ -591,6 +574,8 @@ thread_timer_expire(
        thread_t                thread = p0;
        spl_t                   s;
 
+       assert_thread_magic(thread);
+
        s = splsched();
        thread_lock(thread);
        if (--thread->wait_timer_active == 0) {
@@ -651,19 +636,12 @@ thread_unblock(
 
                (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
 
-               /*
-                *      Update run counts.
-                */
+               /* Update the runnable thread count */
                new_run_count = sched_run_incr(thread);
-               if (thread->sched_mode == TH_MODE_TIMESHARE) {
-                       sched_share_incr(thread);
-
-                       if (thread->sched_flags & TH_SFLAG_THROTTLED)
-                               sched_background_incr(thread);
-               }
        } else {
                /*
-                *      Signal if idling on another processor.
+                * Either the thread is idling in place on another processor,
+                * or it hasn't finished context switching yet.
                 */
 #if CONFIG_SCHED_IDLE_IN_PLACE
                if (thread->state & TH_IDLE) {
@@ -675,8 +653,11 @@ thread_unblock(
 #else
                assert((thread->state & TH_IDLE) == 0);
 #endif
-
-               new_run_count = sched_run_count; /* updated in thread_select_idle() */
+               /*
+                * The run count is only dropped after the context switch completes
+                * and the thread is still waiting, so we should not run_incr here
+                */
+               new_run_count = sched_run_buckets[TH_BUCKET_RUN];
        }
 
 
@@ -745,7 +726,8 @@ thread_unblock(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
-               (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, new_run_count, 0);
+               (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result,
+               sched_run_buckets[TH_BUCKET_RUN], 0);
 
        DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
 
@@ -770,6 +752,8 @@ thread_go(
           thread_t        thread,
           wait_result_t   wresult)
 {
+       assert_thread_magic(thread);
+
        assert(thread->at_safe_point == FALSE);
        assert(thread->wait_event == NO_EVENT64);
        assert(thread->waitq == NULL);
@@ -778,8 +762,13 @@ thread_go(
        assert(thread->state & TH_WAIT);
 
 
-       if (thread_unblock(thread, wresult))
+       if (thread_unblock(thread, wresult)) {
+#if    SCHED_TRACE_THREAD_WAKEUPS
+               backtrace(&thread->thread_wakeup_bt[0],
+                   (sizeof(thread->thread_wakeup_bt)/sizeof(uintptr_t)));
+#endif
                thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
+       }
 
        return (KERN_SUCCESS);
 }
@@ -801,7 +790,6 @@ thread_mark_wait_locked(
 {
        boolean_t               at_safe_point;
 
-       assert(thread == current_thread());
        assert(!(thread->state & (TH_WAIT|TH_IDLE|TH_UNINT|TH_TERMINATE2)));
 
        /*
@@ -905,6 +893,18 @@ assert_wait(
        return waitq_assert_wait64(waitq, CAST_EVENT64_T(event), interruptible, TIMEOUT_WAIT_FOREVER);
 }
 
+/*
+ *     assert_wait_queue:
+ *
+ *     Return the global waitq for the specified event
+ */
+struct waitq *
+assert_wait_queue(
+       event_t                         event)
+{
+       return global_eventq(event);
+}
+
 wait_result_t
 assert_wait_timeout(
        event_t                         event,
@@ -925,7 +925,6 @@ assert_wait_timeout(
 
        s = splsched();
        waitq_lock(waitq);
-       thread_lock(thread);
 
        clock_interval_to_deadline(interval, scale_factor, &deadline);
 
@@ -939,7 +938,6 @@ assert_wait_timeout(
                                             deadline, TIMEOUT_NO_LEEWAY,
                                             thread);
 
-       thread_unlock(thread);
        waitq_unlock(waitq);
        splx(s);
        return wresult;
@@ -976,7 +974,6 @@ assert_wait_timeout_with_leeway(
 
        s = splsched();
        waitq_lock(waitq);
-       thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
@@ -987,7 +984,6 @@ assert_wait_timeout_with_leeway(
                                             urgency, deadline, slop,
                                             thread);
 
-       thread_unlock(thread);
        waitq_unlock(waitq);
        splx(s);
        return wresult;
@@ -1011,7 +1007,6 @@ assert_wait_deadline(
 
        s = splsched();
        waitq_lock(waitq);
-       thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
@@ -1021,7 +1016,6 @@ assert_wait_deadline(
                                             interruptible,
                                             TIMEOUT_URGENCY_SYS_NORMAL, deadline,
                                             TIMEOUT_NO_LEEWAY, thread);
-       thread_unlock(thread);
        waitq_unlock(waitq);
        splx(s);
        return wresult;
@@ -1047,7 +1041,6 @@ assert_wait_deadline_with_leeway(
 
        s = splsched();
        waitq_lock(waitq);
-       thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
@@ -1057,8 +1050,6 @@ assert_wait_deadline_with_leeway(
                                             interruptible,
                                             urgency, deadline, leeway,
                                             thread);
-
-       thread_unlock(thread);
        waitq_unlock(waitq);
        splx(s);
        return wresult;
@@ -1311,21 +1302,19 @@ clear_wait_internal(
        thread_t                thread,
        wait_result_t   wresult)
 {
-       uint32_t        i = LockTimeOut;
+       uint32_t        i = LockTimeOutUsec;
        struct waitq *waitq = thread->waitq;
-
+       
        do {
                if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
                        return (KERN_FAILURE);
 
                if (waitq != NULL) {
-                       assert(waitq_irq_safe(waitq)); //irqs are already disabled!
-                       if (waitq_lock_try(waitq)) {
-                               waitq_pull_thread_locked(waitq, thread);
-                               waitq_unlock(waitq);
-                       } else {
+                       if (!waitq_pull_thread_locked(waitq, thread)) {
                                thread_unlock(thread);
                                delay(1);
+                               if (i > 0 && !machine_timeout_suspended())
+                                       i--;
                                thread_lock(thread);
                                if (waitq != thread->waitq)
                                        return KERN_NOT_WAITING;
@@ -1338,7 +1327,7 @@ clear_wait_internal(
                        return (thread_go(thread, wresult));
                else
                        return (KERN_NOT_WAITING);
-       } while ((--i > 0) || machine_timeout_suspended());
+       } while (i > 0);
 
        panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
                  thread, waitq, cpu_number());
@@ -1383,33 +1372,72 @@ clear_wait(
  */
 kern_return_t
 thread_wakeup_prim(
-       event_t                 event,
-       boolean_t               one_thread,
-       wait_result_t           result)
+                   event_t          event,
+                   boolean_t        one_thread,
+                   wait_result_t    result)
 {
-       return (thread_wakeup_prim_internal(event, one_thread, result, -1));
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
+       struct waitq *wq = global_eventq(event);
+
+       if (one_thread)
+               return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
+       else
+               return waitq_wakeup64_all(wq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
 }
 
+/*
+ * Wakeup a specified thread if and only if it's waiting for this event
+ */
+kern_return_t
+thread_wakeup_thread(
+                     event_t         event,
+                     thread_t        thread)
+{
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
+       struct waitq *wq = global_eventq(event);
+
+       return waitq_wakeup64_thread(wq, CAST_EVENT64_T(event), thread, THREAD_AWAKENED);
+}
 
+/*
+ * Wakeup a thread waiting on an event and promote it to a priority.
+ *
+ * Requires woken thread to un-promote itself when done.
+ */
 kern_return_t
-thread_wakeup_prim_internal(
-       event_t                 event,
-       boolean_t               one_thread,
-       wait_result_t           result,
-       int                     priority)
+thread_wakeup_one_with_pri(
+                           event_t      event,
+                           int          priority)
 {
        if (__improbable(event == NO_EVENT))
                panic("%s() called with NO_EVENT", __func__);
 
-       struct waitq *wq;
+       struct waitq *wq = global_eventq(event);
 
-       wq = global_eventq(event);
-       priority = (priority == -1 ? WAITQ_ALL_PRIORITIES : priority);
+       return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), THREAD_AWAKENED, priority);
+}
 
-       if (one_thread)
-               return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), result, priority);
-       else
-               return waitq_wakeup64_all(wq, CAST_EVENT64_T(event), result, priority);
+/*
+ * Wakeup a thread waiting on an event,
+ * promote it to a priority,
+ * and return a reference to the woken thread.
+ *
+ * Requires woken thread to un-promote itself when done.
+ */
+thread_t
+thread_wakeup_identify(event_t  event,
+                       int      priority)
+{
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
+       struct waitq *wq = global_eventq(event);
+
+       return waitq_wakeup64_identify(wq, CAST_EVENT64_T(event), THREAD_AWAKENED, priority);
 }
 
 /*
@@ -1665,9 +1693,7 @@ sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) {
 
        processor_t sprocessor;
 
-       sprocessor = (processor_t)queue_first(&cpset->active_queue);
-
-       while (!queue_end(&cpset->active_queue, (queue_entry_t)sprocessor)) {
+       qe_foreach_element(sprocessor, &cpset->active_queue, processor_queue) {
                if ((sprocessor->state == PROCESSOR_RUNNING) &&
                    (sprocessor->processor_primary != sprocessor) &&
                    (sprocessor->processor_primary->state == PROCESSOR_RUNNING) &&
@@ -1677,7 +1703,6 @@ sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) {
                        ast_processor = sprocessor;
                        break;
                }
-               sprocessor = (processor_t)queue_next((queue_entry_t)sprocessor);
        }
 
 smt_balance_exit:
@@ -1769,9 +1794,7 @@ thread_select(
                         */
                        if (thread->sched_pri >= BASEPRI_RTQUEUES && processor->first_timeslice) {
                                if (rt_runq.count > 0) {
-                                       thread_t next_rt;
-
-                                       next_rt = (thread_t)queue_first(&rt_runq.queue);
+                                       thread_t next_rt = qe_queue_first(&rt_runq.queue, struct thread, runq_links);
 
                                        assert(next_rt->runq == THREAD_ON_RT_RUNQ);
 
@@ -1806,14 +1829,14 @@ thread_select(
 
                /* OK, so we're not going to run the current thread. Look at the RT queue. */
                if (rt_runq.count > 0) {
-                       thread_t next_rt = (thread_t)queue_first(&rt_runq.queue);
+                       thread_t next_rt = qe_queue_first(&rt_runq.queue, struct thread, runq_links);
 
                        assert(next_rt->runq == THREAD_ON_RT_RUNQ);
 
                        if (__probable((next_rt->bound_processor == PROCESSOR_NULL ||
                                       (next_rt->bound_processor == processor)))) {
 pick_new_rt_thread:
-                               new_thread = (thread_t)dequeue_head(&rt_runq.queue);
+                               new_thread = qe_dequeue_head(&rt_runq.queue, struct thread, runq_links);
 
                                new_thread->runq = PROCESSOR_NULL;
                                SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
@@ -1865,14 +1888,12 @@ pick_new_rt_thread:
                 *      was running.
                 */
                if (processor->state == PROCESSOR_RUNNING) {
-                       remqueue((queue_entry_t)processor);
                        processor->state = PROCESSOR_IDLE;
 
                        if (processor->processor_primary == processor) {
-                               enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
-                       }
-                       else {
-                               enqueue_head(&pset->idle_secondary_queue, (queue_entry_t)processor);
+                               re_queue_head(&pset->idle_queue, &processor->processor_queue);
+                       } else {
+                               re_queue_head(&pset->idle_secondary_queue, &processor->processor_queue);
                        }
                }
 
@@ -1933,12 +1954,6 @@ thread_select_idle(
        uint64_t                arg1, arg2;
        int                     urgency;
 
-       if (thread->sched_mode == TH_MODE_TIMESHARE) {
-               if (thread->sched_flags & TH_SFLAG_THROTTLED)
-                       sched_background_decr(thread);
-
-               sched_share_decr(thread);
-       }
        sched_run_decr(thread);
 
        thread->state |= TH_IDLE;
@@ -2011,12 +2026,6 @@ thread_select_idle(
        thread_tell_urgency(urgency, arg1, arg2, 0, new_thread);
 
        sched_run_incr(thread);
-       if (thread->sched_mode == TH_MODE_TIMESHARE) {
-               sched_share_incr(thread);
-
-               if (thread->sched_flags & TH_SFLAG_THROTTLED)
-                       sched_background_incr(thread);
-       }
 
        return (new_thread);
 }
@@ -2063,12 +2072,14 @@ thread_invoke(
        sched_timeshare_consider_maintenance(ctime);
 #endif
 
+       assert_thread_magic(self);
        assert(self == current_thread());
        assert(self->runq == PROCESSOR_NULL);
        assert((self->state & (TH_RUN|TH_TERMINATE2)) == TH_RUN);
 
        thread_lock(thread);
 
+       assert_thread_magic(thread);
        assert((thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_TERMINATE|TH_TERMINATE2)) == TH_RUN);
        assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == current_processor());
        assert(thread->runq == PROCESSOR_NULL);
@@ -2153,6 +2164,10 @@ thread_invoke(
 
                        DTRACE_SCHED(on__cpu);
 
+#if KPERF
+                       kperf_on_cpu(thread, continuation, NULL);
+#endif /* KPERF */
+
                        thread_dispatch(self, thread);
 
                        thread->continuation = thread->parameter = NULL;
@@ -2172,6 +2187,10 @@ thread_invoke(
 
                        thread_unlock(self);
 
+#if KPERF
+                       kperf_on_cpu(thread, continuation, NULL);
+#endif /* KPERF */
+
                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
                                self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
@@ -2281,6 +2300,10 @@ need_stack:
 
        DTRACE_SCHED(on__cpu);
 
+#if KPERF
+       kperf_on_cpu(self, NULL, __builtin_frame_address(0));
+#endif /* KPERF */
+
        /*
         * We have been resumed and are set to run.
         */
@@ -2318,7 +2341,7 @@ pset_cancel_deferred_dispatch(
        uint32_t                sampled_sched_run_count;
 
        pset_lock(pset);
-       sampled_sched_run_count = (volatile uint32_t) sched_run_count;
+       sampled_sched_run_count = (volatile uint32_t) sched_run_buckets[TH_BUCKET_RUN];
 
        /*
         * If we have emptied the run queue, and our current thread is runnable, we
@@ -2375,7 +2398,7 @@ pset_cancel_deferred_dispatch(
                                 * The tail?  At the (relative) old position in the
                                 * queue?  Or something else entirely?
                                 */
-                               re_queue_head(&pset->idle_queue, (queue_entry_t)active_processor);
+                               re_queue_head(&pset->idle_queue, &active_processor->processor_queue);
 
                                assert(active_processor->next_thread == THREAD_NULL);
 
@@ -2431,8 +2454,9 @@ thread_dispatch(
 
                if (thread->state & TH_IDLE) {
                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
-                               (uintptr_t)thread_tid(thread), 0, thread->state, sched_run_count, 0);
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
+                               (uintptr_t)thread_tid(thread), 0, thread->state,
+                               sched_run_buckets[TH_BUCKET_RUN], 0);
                } else {
                        int64_t consumed;
                        int64_t remainder = 0;
@@ -2467,9 +2491,24 @@ thread_dispatch(
                        thread_lock(thread);
 
                        /*
-                        *      Compute remainder of current quantum.
+                        * Apply a priority floor if the thread holds a kernel resource
+                        * Do this before checking starting_pri to avoid overpenalizing
+                        * repeated rwlock blockers.
+                        */
+                       if (__improbable(thread->rwlock_count != 0))
+                               lck_rw_set_promotion_locked(thread);
+
+                       boolean_t keep_quantum = processor->first_timeslice;
+
+                       /*
+                        * Treat a thread which has dropped priority since it got on core
+                        * as having expired its quantum.
                         */
-                       if (processor->first_timeslice &&
+                       if (processor->starting_pri > thread->sched_pri)
+                               keep_quantum = FALSE;
+
+                       /* Compute remainder of current quantum. */
+                       if (keep_quantum &&
                            processor->quantum_end > processor->last_dispatch)
                                thread->quantum_remaining = (uint32_t)remainder;
                        else
@@ -2523,28 +2562,6 @@ thread_dispatch(
 
                        thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
 
-                       if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) {
-                               integer_t priority;
-
-                               priority = thread->sched_pri;
-
-                               if (priority < thread->base_pri)
-                                       priority = thread->base_pri;
-                               if (priority < BASEPRI_BACKGROUND)
-                                       priority = BASEPRI_BACKGROUND;
-
-                               if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
-                                       KERNEL_DEBUG_CONSTANT(
-                                               MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
-                                               (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, priority, 0);
-
-                                       thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
-
-                                       if (thread->sched_pri < priority)
-                                               set_sched_pri(thread, priority);
-                               }
-                       }
-
                        if (!(thread->state & TH_WAIT)) {
                                /*
                                 *      Still runnable.
@@ -2561,8 +2578,9 @@ thread_dispatch(
                                        thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
 
                                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
-                                       (uintptr_t)thread_tid(thread), thread->reason, thread->state, sched_run_count, 0);
+                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
+                                       (uintptr_t)thread_tid(thread), thread->reason, thread->state,
+                                       sched_run_buckets[TH_BUCKET_RUN], 0);
 
                                if (thread->wake_active) {
                                        thread->wake_active = FALSE;
@@ -2594,12 +2612,6 @@ thread_dispatch(
                                thread->last_made_runnable_time = ~0ULL;
                                thread->chosen_processor = PROCESSOR_NULL;
 
-                               if (thread->sched_mode == TH_MODE_TIMESHARE) {
-                                       if (thread->sched_flags & TH_SFLAG_THROTTLED)
-                                               sched_background_decr(thread);
-
-                                       sched_share_decr(thread);
-                               }
                                new_run_count = sched_run_decr(thread);
 
 #if CONFIG_SCHED_SFI
@@ -2613,8 +2625,9 @@ thread_dispatch(
                                machine_thread_going_off_core(thread, should_terminate);
 
                                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
-                                       (uintptr_t)thread_tid(thread), thread->reason, thread->state, new_run_count, 0);
+                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
+                                       (uintptr_t)thread_tid(thread), thread->reason, thread->state,
+                                       new_run_count, 0);
 
                                (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
 
@@ -2652,7 +2665,7 @@ thread_dispatch(
                }
 #endif
 
-               assert(processor->last_dispatch >= self->last_made_runnable_time);
+               assertf(processor->last_dispatch >= self->last_made_runnable_time, "Non-monotonic time? dispatch at 0x%llx, runnable at 0x%llx", processor->last_dispatch, self->last_made_runnable_time);
                latency = processor->last_dispatch - self->last_made_runnable_time;
 
                urgency = thread_get_urgency(self, &arg1, &arg2);
@@ -2685,6 +2698,7 @@ thread_dispatch(
 
        self->computation_epoch = processor->last_dispatch;
        self->reason = AST_NONE;
+       processor->starting_pri = self->sched_pri;
 
        thread_unlock(self);
 
@@ -2693,7 +2707,7 @@ thread_dispatch(
         * TODO: Can we state that redispatching our old thread is also
         * uninteresting?
         */
-       if ((((volatile uint32_t)sched_run_count) == 1) &&
+       if ((((volatile uint32_t)sched_run_buckets[TH_BUCKET_RUN]) == 1) &&
            !(self->state & TH_IDLE)) {
                pset_cancel_deferred_dispatch(processor->processor_set, processor);
        }
@@ -2839,6 +2853,10 @@ thread_continue(
        continuation = self->continuation;
        parameter = self->parameter;
 
+#if KPERF
+       kperf_on_cpu(self, continuation, NULL);
+#endif
+
        thread_dispatch(thread, self);
 
        self->continuation = self->parameter = NULL;
@@ -2864,10 +2882,10 @@ thread_quantum_init(thread_t thread)
 uint32_t
 sched_timeshare_initial_quantum_size(thread_t thread)
 {
-       if ((thread == THREAD_NULL) || !(thread->sched_flags & TH_SFLAG_THROTTLED))
-               return std_quantum;
-       else
+       if ((thread != THREAD_NULL) && thread->th_sched_bucket == TH_BUCKET_SHARE_BG)
                return bg_quantum;
+       else
+               return std_quantum;
 }
 
 /*
@@ -2879,14 +2897,11 @@ void
 run_queue_init(
        run_queue_t             rq)
 {
-       int                             i;
-
-       rq->highq = IDLEPRI;
-       for (i = 0; i < NRQBM; i++)
+       rq->highq = NOPRI;
+       for (u_int i = 0; i < BITMAP_LEN(NRQS); i++)
                rq->bitmap[i] = 0;
-       setbit(MAXPRI - IDLEPRI, rq->bitmap);
        rq->urgency = rq->count = 0;
-       for (i = 0; i < NRQS; i++)
+       for (int i = 0; i < NRQS; i++)
                queue_init(&rq->queues[i]);
 }
 
@@ -2901,19 +2916,21 @@ run_queue_init(
  */
 thread_t
 run_queue_dequeue(
-       run_queue_t             rq,
-       integer_t               options)
+                  run_queue_t   rq,
+                  integer_t     options)
 {
-       thread_t                thread;
-       queue_t                 queue = rq->queues + rq->highq;
+       thread_t    thread;
+       queue_t     queue = &rq->queues[rq->highq];
 
        if (options & SCHED_HEADQ) {
-               thread = (thread_t)dequeue_head(queue);
-       }
-       else {
-               thread = (thread_t)dequeue_tail(queue);
+               thread = qe_dequeue_head(queue, struct thread, runq_links);
+       } else {
+               thread = qe_dequeue_tail(queue, struct thread, runq_links);
        }
 
+       assert(thread != THREAD_NULL);
+       assert_thread_magic(thread);
+
        thread->runq = PROCESSOR_NULL;
        SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
        rq->count--;
@@ -2921,12 +2938,11 @@ run_queue_dequeue(
                rq->urgency--; assert(rq->urgency >= 0);
        }
        if (queue_empty(queue)) {
-               if (rq->highq != IDLEPRI)
-                       clrbit(MAXPRI - rq->highq, rq->bitmap);
-               rq->highq = MAXPRI - ffsbit(rq->bitmap);
+               bitmap_clear(rq->bitmap, rq->highq);
+               rq->highq = bitmap_first(rq->bitmap, NRQS);
        }
 
-       return (thread);
+       return thread;
 }
 
 /*
@@ -2939,34 +2955,35 @@ run_queue_dequeue(
  */
 boolean_t
 run_queue_enqueue(
-                                                         run_queue_t           rq,
-                                                         thread_t                      thread,
-                                                         integer_t             options)
+                  run_queue_t   rq,
+                  thread_t      thread,
+                  integer_t     options)
 {
-       queue_t                 queue = rq->queues + thread->sched_pri;
-       boolean_t               result = FALSE;
-       
+       queue_t     queue = &rq->queues[thread->sched_pri];
+       boolean_t   result = FALSE;
+
+       assert_thread_magic(thread);
+
        if (queue_empty(queue)) {
-               enqueue_tail(queue, (queue_entry_t)thread);
-               
-               setbit(MAXPRI - thread->sched_pri, rq->bitmap);
+               enqueue_tail(queue, &thread->runq_links);
+
+               rq_bitmap_set(rq->bitmap, thread->sched_pri);
                if (thread->sched_pri > rq->highq) {
                        rq->highq = thread->sched_pri;
                        result = TRUE;
                }
        } else {
                if (options & SCHED_TAILQ)
-                       enqueue_tail(queue, (queue_entry_t)thread);
+                       enqueue_tail(queue, &thread->runq_links);
                else
-                       enqueue_head(queue, (queue_entry_t)thread);
+                       enqueue_head(queue, &thread->runq_links);
        }
        if (SCHED(priority_is_urgent)(thread->sched_pri))
                rq->urgency++;
        SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
        rq->count++;
-       
+
        return (result);
-       
 }
 
 /*
@@ -2978,24 +2995,25 @@ run_queue_enqueue(
  */
 void
 run_queue_remove(
-                                 run_queue_t           rq,
-                                 thread_t                      thread)
+                 run_queue_t    rq,
+                 thread_t       thread)
 {
+       assert(thread->runq != PROCESSOR_NULL);
+       assert_thread_magic(thread);
 
-       remqueue((queue_entry_t)thread);
+       remqueue(&thread->runq_links);
        SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
        rq->count--;
        if (SCHED(priority_is_urgent)(thread->sched_pri)) {
                rq->urgency--; assert(rq->urgency >= 0);
        }
-       
-       if (queue_empty(rq->queues + thread->sched_pri)) {
+
+       if (queue_empty(&rq->queues[thread->sched_pri])) {
                /* update run queue status */
-               if (thread->sched_pri != IDLEPRI)
-                       clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
-               rq->highq = MAXPRI - ffsbit(rq->bitmap);
+               bitmap_clear(rq->bitmap, thread->sched_pri);
+               rq->highq = bitmap_first(rq->bitmap, NRQS);
        }
-       
+
        thread->runq = PROCESSOR_NULL;
 }
 
@@ -3009,7 +3027,7 @@ rt_runq_scan(sched_update_scan_context_t scan_context)
        s = splsched();
        rt_lock_lock();
 
-       qe_foreach_element_safe(thread, &rt_runq.queue, links) {
+       qe_foreach_element_safe(thread, &rt_runq.queue, runq_links) {
                if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
                        scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
                }
@@ -3026,36 +3044,34 @@ rt_runq_scan(sched_update_scan_context_t scan_context)
  *     Enqueue a thread for realtime execution.
  */
 static boolean_t
-realtime_queue_insert(
-       thread_t                        thread)
+realtime_queue_insert(thread_t thread)
 {
-       queue_t                         queue = &rt_runq.queue;
-       uint64_t                        deadline = thread->realtime.deadline;
-       boolean_t                       preempt = FALSE;
+       queue_t     queue       = &rt_runq.queue;
+       uint64_t    deadline    = thread->realtime.deadline;
+       boolean_t   preempt     = FALSE;
 
        rt_lock_lock();
 
        if (queue_empty(queue)) {
-               enqueue_tail(queue, (queue_entry_t)thread);
+               enqueue_tail(queue, &thread->runq_links);
                preempt = TRUE;
-       }
-       else {
-               register thread_t       entry = (thread_t)queue_first(queue);
-
-               while (TRUE) {
-                       if (    queue_end(queue, (queue_entry_t)entry)  ||
-                                               deadline < entry->realtime.deadline             ) {
-                               entry = (thread_t)queue_prev((queue_entry_t)entry);
+       } else {
+               /* Insert into rt_runq in thread deadline order */
+               queue_entry_t iter;
+               qe_foreach(iter, queue) {
+                       thread_t iter_thread = qe_element(iter, struct thread, runq_links);
+                       assert_thread_magic(iter_thread);
+
+                       if (deadline < iter_thread->realtime.deadline) {
+                               if (iter == queue_first(queue))
+                                       preempt = TRUE;
+                               insque(&thread->runq_links, queue_prev(iter));
+                               break;
+                       } else if (iter == queue_last(queue)) {
+                               enqueue_tail(queue, &thread->runq_links);
                                break;
                        }
-
-                       entry = (thread_t)queue_next((queue_entry_t)entry);
                }
-
-               if ((queue_entry_t)entry == queue)
-                       preempt = TRUE;
-
-               insque((queue_entry_t)thread, (queue_entry_t)entry);
        }
 
        thread->runq = THREAD_ON_RT_RUNQ;
@@ -3095,8 +3111,7 @@ realtime_setrun(
         */
        if ( (thread->bound_processor == processor)
                && processor->state == PROCESSOR_IDLE) {
-               remqueue((queue_entry_t)processor);
-               enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+               re_queue_tail(&pset->active_queue, &processor->processor_queue);
 
                processor->next_thread = thread;
                processor->current_pri = thread->sched_pri;
@@ -3131,8 +3146,8 @@ realtime_setrun(
 
        if (preempt != AST_NONE) {
                if (processor->state == PROCESSOR_IDLE) {
-                       remqueue((queue_entry_t)processor);
-                       enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+                       re_queue_tail(&pset->active_queue, &processor->processor_queue);
+
                        processor->next_thread = THREAD_NULL;
                        processor->current_pri = thread->sched_pri;
                        processor->current_thmode = thread->sched_mode;
@@ -3185,7 +3200,7 @@ realtime_setrun(
 boolean_t
 priority_is_urgent(int priority)
 {
-       return testbit(priority, sched_preempt_pri) ? TRUE : FALSE;
+       return bitmap_test(sched_preempt_pri, priority) ? TRUE : FALSE;
 }
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
@@ -3220,8 +3235,8 @@ processor_setrun(
        if ( (SCHED(direct_dispatch_to_idle_processors) ||
                  thread->bound_processor == processor)
                && processor->state == PROCESSOR_IDLE) {
-               remqueue((queue_entry_t)processor);
-               enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+
+               re_queue_tail(&pset->active_queue, &processor->processor_queue);
 
                processor->next_thread = thread;
                processor->current_pri = thread->sched_pri;
@@ -3268,8 +3283,8 @@ processor_setrun(
 
        if (preempt != AST_NONE) {
                if (processor->state == PROCESSOR_IDLE) {
-                       remqueue((queue_entry_t)processor);
-                       enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+                       re_queue_tail(&pset->active_queue, &processor->processor_queue);
+
                        processor->next_thread = THREAD_NULL;
                        processor->current_pri = thread->sched_pri;
                        processor->current_thmode = thread->sched_mode;
@@ -3300,8 +3315,8 @@ processor_setrun(
                        ipi_action = eInterruptRunning;
                } else if (     processor->state == PROCESSOR_IDLE      &&
                                        processor != current_processor()        ) {
-                       remqueue((queue_entry_t)processor);
-                       enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+                       re_queue_tail(&pset->active_queue, &processor->processor_queue);
+
                        processor->next_thread = THREAD_NULL;
                        processor->current_pri = thread->sched_pri;
                        processor->current_thmode = thread->sched_mode;
@@ -3417,7 +3432,9 @@ choose_processor(
        thread_t                        thread)
 {
        processor_set_t         nset, cset = pset;
-       
+
+       assert(thread->sched_pri <= BASEPRI_RTQUEUES);
+
        /*
         * Prefer the hinted processor, when appropriate.
         */
@@ -3468,7 +3485,6 @@ choose_processor(
                                         * the "least cost idle" processor above.
                                         */
                                        return (processor);
-                                       break;
                                case PROCESSOR_RUNNING:
                                case PROCESSOR_DISPATCHING:
                                        /*
@@ -3603,12 +3619,12 @@ choose_processor(
 
                        if (thread->sched_pri > lowest_unpaired_primary_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
+                               re_queue_tail(&cset->active_queue, &lp_unpaired_primary_processor->processor_queue);
                                return lp_unpaired_primary_processor;
                        }
                        if (thread->sched_pri > lowest_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
+                               re_queue_tail(&cset->active_queue, &lp_processor->processor_queue);
                                return lp_processor;
                        }
                        if (thread->realtime.deadline < furthest_deadline)
@@ -3624,12 +3640,12 @@ choose_processor(
 
                        if (thread->sched_pri > lowest_unpaired_primary_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
+                               re_queue_tail(&cset->active_queue, &lp_unpaired_primary_processor->processor_queue);
                                return lp_unpaired_primary_processor;
                        }
                        if (thread->sched_pri > lowest_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
+                               re_queue_tail(&cset->active_queue, &lp_processor->processor_queue);
                                return lp_processor;
                        }
 
@@ -4102,7 +4118,7 @@ thread_run_queue_remove(
 
                assert(thread->runq == THREAD_ON_RT_RUNQ);
 
-               remqueue((queue_entry_t)thread);
+               remqueue(&thread->runq_links);
                SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
                rt_runq.count--;
 
@@ -4159,7 +4175,6 @@ thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
                   ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE)))  {
                /*
                 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
-                * TODO: Use TH_SFLAG_THROTTLED instead?
                 */
                *arg1 = thread->sched_pri;
                *arg2 = thread->base_pri;
@@ -4169,9 +4184,9 @@ thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
                /* For otherwise unclassified threads, report throughput QoS
                 * parameters
                 */
-               *arg1 = thread->effective_policy.t_through_qos;
-               *arg2 = thread->task->effective_policy.t_through_qos;
-               
+               *arg1 = proc_get_effective_thread_policy(thread, TASK_POLICY_THROUGH_QOS);
+               *arg2 = proc_get_effective_task_policy(thread->task, TASK_POLICY_THROUGH_QOS);
+
                return (THREAD_URGENCY_NORMAL);
        }
 }
@@ -4303,22 +4318,19 @@ processor_idle(
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                        MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 
                        (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
-                       
+
                return (new_thread);
-       }
-       else
-       if (state == PROCESSOR_IDLE) {
-               remqueue((queue_entry_t)processor);
+
+       } else if (state == PROCESSOR_IDLE) {
+               re_queue_tail(&pset->active_queue, &processor->processor_queue);
 
                processor->state = PROCESSOR_RUNNING;
                processor->current_pri = IDLEPRI;
                processor->current_thmode = TH_MODE_FIXED;
                processor->current_sfi_class = SFI_CLASS_KERNEL;
                processor->deadline = UINT64_MAX;
-               enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
-       }
-       else
-       if (state == PROCESSOR_SHUTDOWN) {
+
+       } else if (state == PROCESSOR_SHUTDOWN) {
                /*
                 *      Going off-line.  Force a
                 *      reschedule.
@@ -4424,6 +4436,8 @@ sched_startup(void)
 
        thread_deallocate(thread);
 
+       assert_thread_magic(thread);
+
        /*
         * Yield to the sched_init_thread once, to
         * initialize our own thread after being switched
@@ -4438,9 +4452,6 @@ sched_startup(void)
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
 static volatile uint64_t               sched_maintenance_deadline;
-#if defined(CONFIG_TELEMETRY)
-static volatile uint64_t               sched_telemetry_deadline = 0;
-#endif
 static uint64_t                                sched_tick_last_abstime;
 static uint64_t                                sched_tick_delta;
 uint64_t                               sched_tick_max_delta;
@@ -4489,17 +4500,13 @@ sched_timeshare_maintenance_continue(void)
        }
 
        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_START,
-                                                 sched_tick_delta,
-                                                 late_time,
-                                                 0,
-                                                 0,
-                                                 0);
+               sched_tick_delta, late_time, 0, 0, 0);
 
        /* Add a number of pseudo-ticks corresponding to the elapsed interval
         * This could be greater than 1 if substantial intervals where
         * all processors are idle occur, which rarely occurs in practice.
         */
-       
+
        sched_tick += sched_tick_delta;
 
        /*
@@ -4509,7 +4516,8 @@ sched_timeshare_maintenance_continue(void)
 
        /*
         *  Scan the run queues for threads which
-        *  may need to be updated.
+        *  may need to be updated, and find the earliest runnable thread on the runqueue
+        *  to report its latency.
         */
        SCHED(thread_update_scan)(&scan_context);
 
@@ -4517,9 +4525,16 @@ sched_timeshare_maintenance_continue(void)
 
        uint64_t ctime = mach_absolute_time();
 
-       machine_max_runnable_latency(ctime > scan_context.earliest_bg_make_runnable_time ? ctime - scan_context.earliest_bg_make_runnable_time : 0,
-                                                                ctime > scan_context.earliest_normal_make_runnable_time ? ctime - scan_context.earliest_normal_make_runnable_time : 0,
-                                                                ctime > scan_context.earliest_rt_make_runnable_time ? ctime - scan_context.earliest_rt_make_runnable_time : 0);
+       uint64_t bg_max_latency       = (ctime > scan_context.earliest_bg_make_runnable_time) ?
+                                        ctime - scan_context.earliest_bg_make_runnable_time : 0;
+
+       uint64_t default_max_latency  = (ctime > scan_context.earliest_normal_make_runnable_time) ?
+                                        ctime - scan_context.earliest_normal_make_runnable_time : 0;
+
+       uint64_t realtime_max_latency = (ctime > scan_context.earliest_rt_make_runnable_time) ?
+                                        ctime - scan_context.earliest_rt_make_runnable_time : 0;
+
+       machine_max_runnable_latency(bg_max_latency, default_max_latency, realtime_max_latency);
 
        /*
         * Check to see if the special sched VM group needs attention.
@@ -4527,12 +4542,9 @@ sched_timeshare_maintenance_continue(void)
        sched_vm_group_maintenance();
 
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_END,
-                                                 sched_pri_shift,
-                                                 sched_background_pri_shift,
-                                                 0,
-                                                 0,
-                                                 0);
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE) | DBG_FUNC_END,
+               sched_pri_shifts[TH_BUCKET_SHARE_FG], sched_pri_shifts[TH_BUCKET_SHARE_BG],
+               sched_pri_shifts[TH_BUCKET_SHARE_UT], 0, 0);
 
        assert_wait((event_t)sched_timeshare_maintenance_continue, THREAD_UNINT);
        thread_block((thread_continue_t)sched_timeshare_maintenance_continue);
@@ -4567,26 +4579,6 @@ sched_timeshare_consider_maintenance(uint64_t ctime) {
                        sched_maintenance_wakeups++;
                }
        }
-
-#if defined(CONFIG_TELEMETRY)
-       /*
-        * Windowed telemetry is driven by the scheduler.  It should be safe
-        * to call compute_telemetry_windowed() even when windowed telemetry
-        * is disabled, but we should try to avoid doing extra work for no
-        * reason.
-        */
-       if (telemetry_window_enabled) {
-               deadline = sched_telemetry_deadline;
-
-               if (__improbable(ctime >= deadline)) {
-                       ndeadline = ctime + sched_telemetry_interval;
-
-                       if (__probable(__sync_bool_compare_and_swap(&sched_telemetry_deadline, deadline, ndeadline))) {
-                               compute_telemetry_windowed();
-                       }
-               }
-       }
-#endif /* CONFIG_TELEMETRY */
 }
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
@@ -4598,6 +4590,8 @@ sched_init_thread(void (*continuation)(void))
 
        thread_t thread = current_thread();
 
+       thread_set_thread_name(thread, "sched_maintenance_thread");
+
        sched_maintenance_thread = thread;
 
        continuation();
@@ -4625,8 +4619,8 @@ sched_init_thread(void (*continuation)(void))
 
 #define        THREAD_UPDATE_SIZE              128
 
-static thread_t                thread_update_array[THREAD_UPDATE_SIZE];
-static int                     thread_update_count = 0;
+static thread_t thread_update_array[THREAD_UPDATE_SIZE];
+static uint32_t thread_update_count = 0;
 
 /* Returns TRUE if thread was added, FALSE if thread_update_array is full */
 boolean_t
@@ -4643,14 +4637,16 @@ thread_update_add_thread(thread_t thread)
 void
 thread_update_process_threads(void)
 {
-       while (thread_update_count > 0) {
-               spl_t   s;
-               thread_t thread = thread_update_array[--thread_update_count];
-               thread_update_array[thread_update_count] = THREAD_NULL;
+       assert(thread_update_count <= THREAD_UPDATE_SIZE);
 
-               s = splsched();
+       for (uint32_t i = 0 ; i < thread_update_count ; i++) {
+               thread_t thread = thread_update_array[i];
+               assert_thread_magic(thread);
+               thread_update_array[i] = THREAD_NULL;
+
+               spl_t s = splsched();
                thread_lock(thread);
-               if (!(thread->state & (TH_WAIT)) && (SCHED(can_update_priority)(thread))) {
+               if (!(thread->state & (TH_WAIT)) && thread->sched_stamp != sched_tick) {
                        SCHED(update_priority)(thread);
                }
                thread_unlock(thread);
@@ -4658,6 +4654,8 @@ thread_update_process_threads(void)
 
                thread_deallocate(thread);
        }
+
+       thread_update_count = 0;
 }
 
 /*
@@ -4667,41 +4665,48 @@ thread_update_process_threads(void)
  */
 boolean_t
 runq_scan(
-       run_queue_t                             runq,
-       sched_update_scan_context_t     scan_context)
+          run_queue_t                   runq,
+          sched_update_scan_context_t   scan_context)
 {
-       register int                    count;
-       register queue_t                q;
-       register thread_t               thread;
-
-       if ((count = runq->count) > 0) {
-           q = runq->queues + runq->highq;
-               while (count > 0) {
-                       queue_iterate(q, thread, thread_t, links) {
-                               if (            thread->sched_stamp != sched_tick               &&
-                                               (thread->sched_mode == TH_MODE_TIMESHARE)       ) {
-                                       if (thread_update_add_thread(thread) == FALSE)
-                                               return (TRUE);
-                               }
+       int count       = runq->count;
+       int queue_index;
 
-                               if (cpu_throttle_enabled && ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE))) {
-                                       if (thread->last_made_runnable_time < scan_context->earliest_bg_make_runnable_time) {
-                                               scan_context->earliest_bg_make_runnable_time = thread->last_made_runnable_time;
-                                       }
-                               } else {
-                                       if (thread->last_made_runnable_time < scan_context->earliest_normal_make_runnable_time) {
-                                               scan_context->earliest_normal_make_runnable_time = thread->last_made_runnable_time;
-                                       }
-                               }
+       assert(count >= 0);
+
+       if (count == 0)
+               return FALSE;
+
+       for (queue_index = bitmap_first(runq->bitmap, NRQS);
+            queue_index >= 0;
+            queue_index = bitmap_next(runq->bitmap, queue_index)) {
+
+               thread_t thread;
+               queue_t  queue = &runq->queues[queue_index];
 
-                               count--;
+               qe_foreach_element(thread, queue, runq_links) {
+                       assert(count > 0);
+                       assert_thread_magic(thread);
+
+                       if (thread->sched_stamp != sched_tick &&
+                           thread->sched_mode == TH_MODE_TIMESHARE) {
+                               if (thread_update_add_thread(thread) == FALSE)
+                                       return TRUE;
                        }
 
-                       q--;
+                       if (cpu_throttle_enabled && ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE))) {
+                               if (thread->last_made_runnable_time < scan_context->earliest_bg_make_runnable_time) {
+                                       scan_context->earliest_bg_make_runnable_time = thread->last_made_runnable_time;
+                               }
+                       } else {
+                               if (thread->last_made_runnable_time < scan_context->earliest_normal_make_runnable_time) {
+                                       scan_context->earliest_normal_make_runnable_time = thread->last_made_runnable_time;
+                               }
+                       }
+                       count--;
                }
        }
 
-       return (FALSE);
+       return FALSE;
 }
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */