]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/sched_prim.h
xnu-4570.51.1.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.h
index cd9e8e8d41f6201a63fb377fb5f8e0bca324bc2c..c6361a9cac9ecd087b9506ddfb807743ada2ff00 100644 (file)
@@ -1,23 +1,29 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 /*
  * @OSF_COPYRIGHT@
 #include <kern/clock.h>
 #include <kern/kern_types.h>
 #include <kern/thread.h>
 #include <kern/clock.h>
 #include <kern/kern_types.h>
 #include <kern/thread.h>
-#include <kern/lock.h>
-#include <kern/time_out.h>     /*** ??? temp - remove me soon ***/
-#include <kern/cpu_data.h>
-#include <kern/wait_queue.h>
+#include <sys/cdefs.h>
+#include <kern/block_hint.h>
 
 
-#ifdef MACH_KERNEL_PRIVATE
+#ifdef MACH_KERNEL_PRIVATE
 
 
-#include <mach_ldebug.h>
-/*
- *     Exported interface to sched_prim.c.
- *     A few of these functions are actually defined in
- *     ipc_sched.c, for historical reasons.
- */
+#include <mach/branch_predicates.h>
 
 
-/* Initialize scheduler module */
+/* Initialization */
 extern void            sched_init(void);
 
 extern void            sched_init(void);
 
-/*
- * Set up thread timeout element(s) when thread is created.
- */
-extern void            thread_timer_setup(
-                                       thread_t                thread);
+extern void            sched_startup(void);
 
 
-extern void            thread_timer_terminate(void);
+extern void            sched_timebase_init(void);
 
 
-#define thread_bind_locked(thread, processor)  \
-               (thread)->bound_processor = (processor)
+extern void            pset_rt_init(processor_set_t pset);
 
 
-/*
- * Prevent a thread from restarting after it blocks interruptibly
- */
+extern void            sched_rtglobal_init(processor_set_t pset);
+
+extern rt_queue_t      sched_rtglobal_runq(processor_set_t pset);
+
+extern void            sched_rtglobal_queue_shutdown(processor_t processor);
+
+extern int64_t         sched_rtglobal_runq_count_sum(void);
+
+extern void            sched_check_spill(processor_set_t pset, thread_t thread);
+
+extern bool             sched_thread_should_yield(processor_t processor, thread_t thread);
+
+/* Force a preemption point for a thread and wait for it to stop running */
 extern boolean_t       thread_stop( 
 extern boolean_t       thread_stop( 
-                                               thread_t        thread);
+                                               thread_t        thread,
+                                               boolean_t       until_not_runnable);
 
 
-/*
- * wait for a thread to stop
- */
-extern boolean_t       thread_wait(
+/* Release a previous stop request */
+extern void                    thread_unstop(
                                                thread_t        thread);
 
                                                thread_t        thread);
 
-/* Select a thread to run on a particular processor */
-extern thread_t        thread_select(
-                                               processor_t     myprocessor);
+/* Wait for a thread to stop running */
+extern void                    thread_wait(
+                                               thread_t        thread,
+                                               boolean_t       until_not_runnable);
 
 
-extern void            thread_go_locked(
-                                               thread_t        thread,
-                                               int             result);
+/* Unblock thread on wake up */
+extern boolean_t       thread_unblock(
+                                               thread_t                thread,
+                                               wait_result_t   wresult);
 
 
-/* Stop old thread and run new thread */
-extern boolean_t thread_invoke(
-                                               thread_t        old_thread,
-                                               thread_t        new_thread,
-                                               int                     reason,
-                                               void        (*continuation)(void));
+/* Unblock and dispatch thread */
+extern kern_return_t   thread_go(
+                                                       thread_t                thread,
+                                                       wait_result_t   wresult);
 
 
-/* Called when current thread is given new stack */
-extern void            thread_continue(
-                                               thread_t        old_thread);
+/* Handle threads at context switch */
+extern void                    thread_dispatch(
+                                               thread_t                old_thread,
+                                               thread_t                new_thread);
 
 /* Switch directly to a particular thread */
 
 /* Switch directly to a particular thread */
-extern int             thread_run(
-                                               thread_t        old_thread,
-                                               void            (*continuation)(void),
-                                               thread_t        new_thread);
+extern int                     thread_run(
+                                               thread_t                        self,
+                                               thread_continue_t       continuation,
+                                               void                            *parameter,
+                                               thread_t                        new_thread);
 
 
-/* Dispatch a thread not on a run queue */
-extern void            thread_dispatch(
-                                               thread_t        thread);
+/* Resume thread with new stack */
+extern void                    thread_continue(
+                                               thread_t                old_thread);
 
 /* Invoke continuation */
 extern void            call_continuation(
 
 /* Invoke continuation */
 extern void            call_continuation(
-                                               void            (*continuation)(void));
-
-/* Compute effective priority of the specified thread */
-extern void            compute_priority(
-                                               thread_t        thread,
-                                               int                     resched);
-
-/* Version of compute_priority for current thread or
- * thread being manipuldated by scheduler.
- */
-extern void            compute_my_priority(
-                                               thread_t        thread);
+                                       thread_continue_t       continuation,
+                                       void                            *parameter,
+                                       wait_result_t           wresult);
+
+/* Set the current scheduled priority */
+extern void            set_sched_pri(
+                                       thread_t                thread,
+                                       int                             priority);
+
+/* Set base priority of the specified thread */
+extern void            sched_set_thread_base_priority(
+                                       thread_t                thread,
+                                       int                             priority);
+
+/* Set the thread's true scheduling mode */
+extern void             sched_set_thread_mode(thread_t thread,
+                                              sched_mode_t mode);
+/* Demote the true scheduler mode */
+extern void             sched_thread_mode_demote(thread_t thread,
+                                                 uint32_t reason);
+/* Un-demote the true scheduler mode */
+extern void             sched_thread_mode_undemote(thread_t thread,
+                                                   uint32_t reason);
+
+/* Re-evaluate base priority of thread (thread locked) */
+void thread_recompute_priority(thread_t thread);
+
+/* Re-evaluate base priority of thread (thread unlocked) */
+void thread_recompute_qos(thread_t thread);
+
+/* Reset scheduled priority of thread */
+extern void            thread_recompute_sched_pri(
+                                       thread_t                thread,
+                                       boolean_t               override_depress);
 
 /* Periodic scheduler activity */
 
 /* Periodic scheduler activity */
-extern void            sched_tick_init(void);
+extern void            sched_init_thread(void (*)(void));
+
+/* Perform sched_tick housekeeping activities */
+extern boolean_t               can_update_priority(
+                                       thread_t                thread);
 
 
-/* Update priority of thread that has been sleeping or suspended.
- * Used to "catch up" with the system.
- */
 extern void            update_priority(
 extern void            update_priority(
-                                               thread_t        thread);
+                                                                                       thread_t                thread);
 
 
-/* Idle thread loop */
+extern void            lightweight_update_priority(
+                                                               thread_t                thread);
+
+extern void             sched_default_quantum_expire(thread_t thread);
+
+/* Idle processor thread */
 extern void            idle_thread(void);
 
 extern void            idle_thread(void);
 
-/*
- *     thread_sleep_interlock:
- *
- *     Cause the current thread to wait until the specified event
- *     occurs.  The specified HW interlock is unlocked before releasing
- *     the cpu.  (This is a convenient way to sleep without manually
- *     calling assert_wait).
- */
+extern kern_return_t   idle_thread_create(
+                                                       processor_t             processor);
 
 
-#define thread_sleep_interlock(event, lock, interruptible)     \
-MACRO_BEGIN                                                                                                    \
-       assert_wait(event, interruptible);                                              \
-       interlock_unlock(lock);                                                                 \
-       thread_block((void (*)(void)) 0);                                               \
-MACRO_END
+/* Continuation return from syscall */
+extern void     thread_syscall_return(
+                        kern_return_t   ret);
 
 
-/*
- *     Machine-dependent code must define these functions.
+/* Context switch */
+extern wait_result_t   thread_block_reason(
+                                                       thread_continue_t       continuation,
+                                                       void                            *parameter,
+                                                       ast_t                           reason);
+
+/* Reschedule thread for execution */
+extern void            thread_setrun(
+                                       thread_t        thread,
+                                       integer_t       options);
+
+typedef enum {
+       SCHED_NONE      = 0x0,
+       SCHED_TAILQ     = 0x1,
+       SCHED_HEADQ     = 0x2,
+       SCHED_PREEMPT   = 0x4,
+       SCHED_REBALANCE = 0x8,
+} sched_options_t;
+
+extern processor_set_t task_choose_pset(
+                                                       task_t                  task);
+
+/* Bind the current thread to a particular processor */
+extern processor_t             thread_bind(
+                                                       processor_t             processor);
+
+/* Choose the best processor to run a thread */
+extern processor_t     choose_processor(
+                                                                        processor_set_t                pset,
+                                                                        processor_t                    processor,
+                                                                        thread_t                       thread);
+
+extern void sched_SMT_balance(
+                             processor_t processor,
+                             processor_set_t pset);
+
+extern void thread_quantum_init(
+                                                               thread_t thread);
+
+extern void            run_queue_init(
+                                       run_queue_t             runq);
+
+extern thread_t        run_queue_dequeue(
+                                                         run_queue_t           runq,
+                                                         integer_t             options);
+
+extern boolean_t       run_queue_enqueue(
+                                                         run_queue_t           runq,
+                                                         thread_t                      thread,
+                                                         integer_t             options);
+
+extern void    run_queue_remove(
+                                                                        run_queue_t            runq,
+                                                                        thread_t                       thread);
+                                                                         
+struct sched_update_scan_context
+{
+       uint64_t        earliest_bg_make_runnable_time;
+       uint64_t        earliest_normal_make_runnable_time;
+       uint64_t        earliest_rt_make_runnable_time;
+};
+typedef struct sched_update_scan_context *sched_update_scan_context_t;
+
+extern void            sched_rtglobal_runq_scan(sched_update_scan_context_t scan_context);
+
+/* 
+ * Enum to define various events which need IPIs. The IPI policy 
+ * engine decides what kind of IPI to use based on destination 
+ * processor state, thread and one of the following scheduling events.
+ */
+typedef enum {
+       SCHED_IPI_EVENT_BOUND_THR   = 0x1,
+       SCHED_IPI_EVENT_PREEMPT     = 0x2,
+       SCHED_IPI_EVENT_SMT_REBAL   = 0x3,
+       SCHED_IPI_EVENT_SPILL       = 0x4,
+       SCHED_IPI_EVENT_REBALANCE   = 0x5,
+} sched_ipi_event_t;
+
+
+/* Enum to define various IPI types used by the scheduler */
+typedef enum {
+       SCHED_IPI_NONE              = 0x0,
+       SCHED_IPI_IMMEDIATE         = 0x1,
+       SCHED_IPI_IDLE              = 0x2,
+       SCHED_IPI_DEFERRED          = 0x3,
+} sched_ipi_type_t;
+
+/* The IPI policy engine behaves in the following manner:
+ * - All scheduler events which need an IPI invoke sched_ipi_action() with  
+ *   the appropriate destination processor, thread and event.
+ * - sched_ipi_action() performs basic checks, invokes the scheduler specific
+ *   ipi_policy routine and sets pending_AST bits based on the result.
+ * - Once the pset lock is dropped, the scheduler invokes sched_ipi_perform()
+ *   routine which actually sends the appropriate IPI to the destination core.
  */
  */
+extern sched_ipi_type_t sched_ipi_action(processor_t dst, thread_t thread,
+         boolean_t dst_idle, sched_ipi_event_t event);
+extern void sched_ipi_perform(processor_t dst, sched_ipi_type_t ipi);
 
 
-/* Start thread running */
-extern void            thread_bootstrap_return(void);
+/* sched_ipi_policy() is the global default IPI policy for all schedulers */
+extern sched_ipi_type_t sched_ipi_policy(processor_t dst, thread_t thread,
+         boolean_t dst_idle, sched_ipi_event_t event);
 
 
-/* Return from exception */
-extern void            thread_exception_return(void);
+/* sched_ipi_deferred_policy() is the global default deferred IPI policy for all schedulers */
+extern sched_ipi_type_t sched_ipi_deferred_policy(processor_set_t pset,
+         processor_t dst, sched_ipi_event_t event);
 
 
-/* Continuation return from syscall */
-extern void     thread_syscall_return(
-                        kern_return_t   ret);
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
 
-extern thread_t        switch_context(
-                                               thread_t        old_thread,
-                                               void            (*continuation)(void),
-                                               thread_t        new_thread);
+extern boolean_t        thread_update_add_thread(thread_t thread);
+extern void             thread_update_process_threads(void);
+extern boolean_t        runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
 
 
-/* Attach stack to thread */
-extern void            machine_kernel_stack_init(
-                                               thread_t        thread,
-                                               void            (*start_pos)(thread_t));
+extern void sched_timeshare_init(void);
+extern void sched_timeshare_timebase_init(void);
+extern void sched_timeshare_maintenance_continue(void);
 
 
-extern void            load_context(
-                                               thread_t        thread);
+extern boolean_t priority_is_urgent(int priority);
+extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
 
 
-extern thread_act_t            switch_act(
-                                                       thread_act_t    act);
+extern int sched_compute_timeshare_priority(thread_t thread);
 
 
-extern void            machine_switch_act(
-                                                       thread_t                thread,
-                                                       thread_act_t    old,
-                                                       thread_act_t    new,
-                                                       int                             cpu);
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
 
-/*
- *     These functions are either defined in kern/thread.c
- *     or are defined directly by machine-dependent code.
- */
+/* Remove thread from its run queue */
+extern boolean_t       thread_run_queue_remove(thread_t thread);
+thread_t thread_run_queue_remove_for_handoff(thread_t thread);
+
+/* Put a thread back in the run queue after being yanked */
+extern void thread_run_queue_reinsert(thread_t thread, integer_t options);
+
+extern void            thread_timer_expire(
+                                       void                    *thread,
+                                       void                    *p1);
+
+extern boolean_t       thread_eager_preemption(
+                                               thread_t thread);
+
+extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
+
+/* Set the maximum interrupt level for the thread */
+__private_extern__ wait_interrupt_t thread_interrupt_level(
+                                               wait_interrupt_t interruptible);
+
+__private_extern__ wait_result_t thread_mark_wait_locked(
+                                               thread_t                 thread,
+                                               wait_interrupt_t interruptible);
 
 
-/* Allocate an activation stack */
-extern vm_offset_t     stack_alloc(thread_t thread, void (*start_pos)(thread_t));
+/* Wake up locked thread directly, passing result */
+__private_extern__ kern_return_t clear_wait_internal(
+                                               thread_t                thread,
+                                               wait_result_t   result);
 
 
-/* Free an activation stack */
-extern void            stack_free(thread_t thread);
+extern void sched_stats_handle_csw(
+                                                       processor_t processor, 
+                                                       int reasons, 
+                                                       int selfpri, 
+                                                       int otherpri);
 
 
-/* Collect excess kernel stacks */
-extern void            stack_collect(void);
+extern void sched_stats_handle_runq_change(
+                                                                       struct runq_stats *stats, 
+                                                                       int old_count);
 
 
-extern void            set_pri(
+
+#if DEBUG
+
+#define        SCHED_STATS_CSW(processor, reasons, selfpri, otherpri)          \
+do {                                                           \
+       if (__builtin_expect(sched_stats_active, 0)) {  \
+               sched_stats_handle_csw((processor),             \
+                               (reasons), (selfpri), (otherpri));      \
+       }                                                       \
+} while (0) 
+
+
+#define SCHED_STATS_RUNQ_CHANGE(stats, old_count)              \
+do {                                                           \
+       if (__builtin_expect(sched_stats_active, 0)) {  \
+               sched_stats_handle_runq_change((stats),         \
+                                                               (old_count));           \
+       }                                                       \
+} while (0) 
+
+#else /* DEBUG */
+
+#define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) do { }while(0)
+#define SCHED_STATS_RUNQ_CHANGE(stats, old_count) do { }while(0)
+
+#endif /* DEBUG */
+
+extern uint32_t sched_debug_flags;
+#define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS  0x00000001
+#define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS  0x00000002
+
+#define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...) do {                                           \
+               if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) { \
+                       KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                                                     \
+               }                                                                                                                               \
+       } while(0)
+
+#define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(...) do {                                           \
+               if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) { \
+                       KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                                                     \
+               }                                                                                                                               \
+       } while(0)
+
+#define THREAD_URGENCY_NONE            0       /* indicates that there is no currently runnable */
+#define THREAD_URGENCY_BACKGROUND      1       /* indicates that the thread is marked as a "background" thread */
+#define THREAD_URGENCY_NORMAL          2       /* indicates that the thread is marked as a "normal" thread */
+#define THREAD_URGENCY_REAL_TIME       3       /* indicates that the thread is marked as a "real-time" or urgent thread */
+#define        THREAD_URGENCY_MAX              4       /* Marker */
+/* Returns the "urgency" of a thread (provided by scheduler) */
+extern int     thread_get_urgency(
                                        thread_t        thread,
                                        thread_t        thread,
-                                       int                     pri,
-                                       boolean_t       resched);
+                                       uint64_t        *rt_period,
+                                       uint64_t        *rt_deadline);
 
 
-/* Block current thread, indicating reason (Block or Quantum expiration) */
-extern int             thread_block_reason(
-                                               void            (*continuation)(void),
-                                               int             reason);
+/* Tells the "urgency" of the just scheduled thread (provided by CPU PM) */
+extern void    thread_tell_urgency(
+                                       int             urgency,
+                                       uint64_t        rt_period,
+                                       uint64_t        rt_deadline,
+                                       uint64_t        sched_latency,
+                                   thread_t nthread);
 
 
-/* Make thread runnable */
-extern void            thread_setrun(
-                                               thread_t        thread,
-                                               boolean_t       may_preempt,
-                                               boolean_t       tail);
-/*
- *     Flags for thread_setrun()
- */
+/* Tells if there are "active" RT threads in the system (provided by CPU PM) */
+extern void    active_rt_threads(
+                                       boolean_t       active);
 
 
-#define HEAD_Q         0               /* FALSE */
-#define TAIL_Q         1               /* TRUE */
+/* Returns the perfcontrol attribute for the thread */
+extern perfcontrol_class_t thread_get_perfcontrol_class(
+                                       thread_t        thread);
 
 
-/* Bind thread to a particular processor */
-extern void            thread_bind(
-                                               thread_t        thread,
-                                               processor_t     processor);
+#define PSET_LOAD_NUMERATOR_SHIFT   16
+#define PSET_LOAD_FRACTIONAL_SHIFT   4
 
 
-extern void            thread_mark_wait_locked(
-                                               thread_t        thread,
-                                               int                     interruptible);
+extern int sched_get_pset_load_average(processor_set_t pset);
+extern void sched_update_pset_load_average(processor_set_t pset);
+
+/* Generic routine for Non-AMP schedulers to calculate parallelism */
+extern uint32_t sched_qos_max_parallelism(int qos, uint64_t options);
 
 #endif /* MACH_KERNEL_PRIVATE */
 
 
 #endif /* MACH_KERNEL_PRIVATE */
 
+__BEGIN_DECLS
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* Toggles a global override to turn off CPU Throttling */
+#define CPU_THROTTLE_DISABLE   0
+#define CPU_THROTTLE_ENABLE    1
+extern void    sys_override_cpu_throttle(int flag);
+
 /*
  ****************** Only exported until BSD stops using ********************
  */
 
 /*
  ****************** Only exported until BSD stops using ********************
  */
 
-/*
- * Cancel a stop and continue the thread if necessary.
- */
-extern void            thread_unstop(
-                                               thread_t        thread);
+extern void                    thread_vm_bind_group_add(void);
 
 /* Wake up thread directly, passing result */
 
 /* Wake up thread directly, passing result */
-extern void            clear_wait(
-                                               thread_t        thread,
-                                               int             result);
+extern kern_return_t clear_wait(
+                                               thread_t                thread,
+                                               wait_result_t   result);
 
 
-/* Bind thread to a particular processor */
-extern void            thread_bind(
-                                               thread_t        thread,
-                                               processor_t     processor);
+/* Start thread running */
+extern void            thread_bootstrap_return(void) __attribute__((noreturn));
 
 
+/* Return from exception (BSD-visible interface) */
+extern void            thread_exception_return(void) __dead2;
 
 
-/*
- * *********************   PUBLIC APIs ************************************
- */
+#define SCHED_STRING_MAX_LENGTH (48)
+/* String declaring the name of the current scheduler */
+extern char sched_string[SCHED_STRING_MAX_LENGTH];
 
 
-/* Set timer for current thread */
-extern void            thread_set_timer(
-                                       uint32_t                interval,
-                                       uint32_t                scale_factor);
+extern thread_t port_name_to_thread_for_ulock(mach_port_name_t thread_name);
 
 
-extern void            thread_set_timer_deadline(
-                                       uint64_t                deadline);
+/* Attempt to context switch to a specific runnable thread */
+extern wait_result_t thread_handoff(thread_t thread);
 
 
-extern void            thread_cancel_timer(void);
+extern struct waitq    *assert_wait_queue(event_t event);
 
 
-/*
- * thread_stop a thread then wait for it to stop (both of the above)
- */
-extern boolean_t       thread_stop_wait(
-                                               thread_t        thread);
+extern kern_return_t thread_wakeup_one_with_pri(event_t event, int priority);
 
 
-/* Declare thread will wait on a particular event */
-extern void            assert_wait(
-                                               event_t         event,
-                                               int             interruptflag);
+extern thread_t thread_wakeup_identify(event_t event, int priority);
 
 
-/* Assert that the thread intends to wait for a timeout */
-extern void            assert_wait_timeout(
-                                               natural_t       msecs,
-                                               int             interruptflags);
+#endif /* XNU_KERNEL_PRIVATE */
 
 
-/* Wake up thread (or threads) waiting on a particular event */
-extern void            thread_wakeup_prim(
-                                               event_t         event,
-                                               boolean_t       one_thread,
-                                               int             result);
+#ifdef KERNEL_PRIVATE
+/* Set pending block hint for a particular object before we go into a wait state */
+extern void            thread_set_pending_block_hint(
+                                                       thread_t                        thread,
+                                                       block_hint_t                    block_hint);
 
 
-/* Block current thread (Block reason) */
-extern int             thread_block(
-                                               void            (*continuation)(void));
+#define QOS_PARALLELISM_COUNT_LOGICAL   0x1
+#define QOS_PARALLELISM_REALTIME        0x2
+extern uint32_t qos_max_parallelism(int qos, uint64_t options);
 
 
+#endif /* KERNEL_PRIVATE */
 
 
-/*
- *     Routines defined as macros
- */
+/* Context switch */
+extern wait_result_t   thread_block(
+                                                       thread_continue_t       continuation);
+
+extern wait_result_t   thread_block_parameter(
+                                                       thread_continue_t       continuation,
+                                                       void                            *parameter);
+
+/* Declare thread will wait on a particular event */
+extern wait_result_t   assert_wait(
+                                                       event_t                         event,
+                                                       wait_interrupt_t        interruptible);
+
+/* Assert that the thread intends to wait with a timeout */
+extern wait_result_t   assert_wait_timeout(
+                                                       event_t                         event,
+                                                       wait_interrupt_t        interruptible,
+                                                       uint32_t                        interval,
+                                                       uint32_t                        scale_factor);
+
+/* Assert that the thread intends to wait with an urgency, timeout and leeway */
+extern wait_result_t   assert_wait_timeout_with_leeway(
+                                                       event_t                         event,
+                                                       wait_interrupt_t        interruptible,
+                                                       wait_timeout_urgency_t  urgency,
+                                                       uint32_t                        interval,
+                                                       uint32_t                        leeway,
+                                                       uint32_t                        scale_factor);
+
+extern wait_result_t   assert_wait_deadline(
+                                                       event_t                         event,
+                                                       wait_interrupt_t        interruptible,
+                                                       uint64_t                        deadline);
+
+/* Assert that the thread intends to wait with an urgency, deadline, and leeway */
+extern wait_result_t   assert_wait_deadline_with_leeway(
+                                                       event_t                         event,
+                                                       wait_interrupt_t        interruptible,
+                                                       wait_timeout_urgency_t  urgency,
+                                                       uint64_t                        deadline,
+                                                       uint64_t                        leeway);
+
+/* Wake up thread (or threads) waiting on a particular event */
+extern kern_return_t   thread_wakeup_prim(
+                                                       event_t                         event,
+                                                       boolean_t                       one_thread,
+                                                       wait_result_t                   result);
 
 #define thread_wakeup(x)                                       \
                        thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
 
 #define thread_wakeup(x)                                       \
                        thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
@@ -334,45 +556,223 @@ extern int               thread_block(
 #define thread_wakeup_one(x)                           \
                        thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
 
 #define thread_wakeup_one(x)                           \
                        thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
 
-/*
- *     thread_sleep_mutex:
- *
- *     Cause the current thread to wait until the specified event
- *     occurs.  The specified mutex is unlocked before releasing
- *     the cpu.  (This is a convenient way to sleep without manually
- *     calling assert_wait).
- */
+/* Wakeup the specified thread if it is waiting on this event */
+extern kern_return_t thread_wakeup_thread(event_t event, thread_t thread);
 
 
-#define thread_sleep_mutex(event, lock, interruptible) \
-MACRO_BEGIN                                                                                            \
-       assert_wait(event, interruptible);                                      \
-       mutex_unlock(lock);                                                                     \
-       thread_block((void (*)(void)) 0);                                       \
-MACRO_END
+extern boolean_t preemption_enabled(void);
+
+#ifdef MACH_KERNEL_PRIVATE
 
 /*
 
 /*
- *     thread_sleep_simple_lock:
- *
- *     Cause the current thread to wait until the specified event
- *     occurs.  The specified simple_lock is unlocked before releasing
- *     the cpu.  (This is a convenient way to sleep without manually
- *     calling assert_wait).
+ * Scheduler algorithm indirection. If only one algorithm is
+ * enabled at compile-time, a direction function call is used.
+ * If more than one is enabled, calls are dispatched through
+ * a function pointer table.
  */
 
  */
 
-#define thread_sleep_simple_lock(event, lock, interruptible)   \
-MACRO_BEGIN                                                                                                            \
-       assert_wait(event, interruptible);                                                      \
-       simple_unlock(lock);                                                                            \
-       thread_block((void (*)(void)) 0);                                                       \
-MACRO_END
+#if   !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_MULTIQ)
+#error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
+#endif
+
+#if DEBUG
+#define SCHED(f) (sched_current_dispatch->f)
+#else /* DEBUG */
 
 
-#if            !defined(MACH_KERNEL_PRIVATE) && !defined(ABSOLUTETIME_SCALAR_TYPE)
+/* 
+ * For DEV & REL kernels, use a static dispatch table instead of 
+ * using the indirect function table.
+ */
+extern const struct sched_dispatch_table sched_multiq_dispatch;
+#define SCHED(f) (sched_multiq_dispatch.f)
+
+#endif /* DEBUG */
+
+struct sched_dispatch_table {
+       const char *sched_name;
+       void    (*init)(void);                          /* Init global state */
+       void    (*timebase_init)(void);         /* Timebase-dependent initialization */
+       void    (*processor_init)(processor_t processor);       /* Per-processor scheduler init */
+       void    (*pset_init)(processor_set_t pset);     /* Per-processor set scheduler init */
+
+       void    (*maintenance_continuation)(void);      /* Function called regularly */
+
+       /*
+        * Choose a thread of greater or equal priority from the per-processor
+        * runqueue for timeshare/fixed threads
+        */
+       thread_t        (*choose_thread)(
+                                                                 processor_t           processor,
+                                                                 int                           priority,
+                                                                 ast_t reason);
+
+       /* True if scheduler supports stealing threads */
+       boolean_t   steal_thread_enabled;
+
+       /*
+        * Steal a thread from another processor in the pset so that it can run
+        * immediately
+        */
+       thread_t        (*steal_thread)(
+                                                               processor_set_t         pset);
+
+       /*
+        * Compute priority for a timeshare thread based on base priority.
+        */
+       int (*compute_timeshare_priority)(thread_t thread);
+
+       /*
+        * Pick the best processor for a thread (any kind of thread) to run on.
+        */
+       processor_t     (*choose_processor)(
+                                                                                processor_set_t                pset,
+                                                                                processor_t                    processor,
+                                                                                thread_t                       thread);
+       /*
+        * Enqueue a timeshare or fixed priority thread onto the per-processor
+        * runqueue
+        */
+       boolean_t (*processor_enqueue)(
+                                                                processor_t                    processor,
+                                                                thread_t                       thread,
+                                                                integer_t                      options);
+
+       /* Migrate threads away in preparation for processor shutdown */
+       void (*processor_queue_shutdown)(
+                                                                        processor_t                    processor);
+
+       /* Remove the specific thread from the per-processor runqueue */
+       boolean_t       (*processor_queue_remove)(
+                                                                       processor_t             processor,
+                                                                       thread_t                thread);
+
+       /*
+        * Does the per-processor runqueue have any timeshare or fixed priority
+        * threads on it? Called without pset lock held, so should
+        * not assume immutability while executing.
+        */
+       boolean_t       (*processor_queue_empty)(processor_t            processor);
+
+       /*
+        * Would this priority trigger an urgent preemption if it's sitting
+        * on the per-processor runqueue?
+        */
+       boolean_t       (*priority_is_urgent)(int priority);
+
+       /*
+        * Does the per-processor runqueue contain runnable threads that
+        * should cause the currently-running thread to be preempted?
+        */
+       ast_t           (*processor_csw_check)(processor_t processor);
+
+       /*
+        * Does the per-processor runqueue contain a runnable thread
+        * of > or >= priority, as a preflight for choose_thread() or other
+        * thread selection
+        */
+       boolean_t       (*processor_queue_has_priority)(processor_t             processor,
+                                                                                               int                             priority,
+                                                                                               boolean_t               gte);
+
+       /* Quantum size for the specified non-realtime thread. */
+       uint32_t        (*initial_quantum_size)(thread_t thread);
+       
+       /* Scheduler mode for a new thread */
+       sched_mode_t    (*initial_thread_sched_mode)(task_t parent_task);
+
+       /*
+        * Is it safe to call update_priority, which may change a thread's
+        * runqueue or other state. This can be used to throttle changes
+        * to dynamic priority.
+        */
+       boolean_t       (*can_update_priority)(thread_t thread);
+
+       /*
+        * Update both scheduled priority and other persistent state.
+        * Side effects may including migration to another processor's runqueue.
+        */
+       void            (*update_priority)(thread_t thread);
+
+       /* Lower overhead update to scheduled priority and state. */
+       void            (*lightweight_update_priority)(thread_t thread);
+
+       /* Callback for non-realtime threads when the quantum timer fires */
+       void            (*quantum_expire)(thread_t thread);
+
+       /*
+        * Runnable threads on per-processor runqueue. Should only
+        * be used for relative comparisons of load between processors.
+        */
+       int                     (*processor_runq_count)(processor_t     processor);
+
+       /* Aggregate runcount statistics for per-processor runqueue */
+       uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
+
+       boolean_t       (*processor_bound_count)(processor_t processor);
+
+       void            (*thread_update_scan)(sched_update_scan_context_t scan_context);
+
+       /*
+       * Use processor->next_thread to pin a thread to an idle
+       * processor. If FALSE, threads are enqueued and can
+       * be stolen by other processors.
+       */
+       boolean_t   direct_dispatch_to_idle_processors;
+
+       /* Supports more than one pset */
+       boolean_t   multiple_psets_enabled;
+       /* Supports scheduler groups */
+       boolean_t   sched_groups_enabled;
+
+       /* Supports avoid-processor */
+       boolean_t   avoid_processor_enabled;
+
+       /* Returns true if this processor should avoid running this thread. */
+       bool    (*thread_avoid_processor)(processor_t processor, thread_t thread);
+
+       /*
+        * Invoked when a processor is about to choose the idle thread
+        * Used to send IPIs to a processor which would be preferred to be idle instead.
+        * Called with pset lock held, returns pset lock unlocked.
+        */
+       void    (*processor_balance)(processor_t processor, processor_set_t pset);
+       rt_queue_t      (*rt_runq)(processor_set_t pset);
+       void    (*rt_init)(processor_set_t pset);
+       void    (*rt_queue_shutdown)(processor_t processor);
+       void    (*rt_runq_scan)(sched_update_scan_context_t scan_context);
+       int64_t (*rt_runq_count_sum)(void);
+
+       uint32_t (*qos_max_parallelism)(int qos, uint64_t options);
+       void    (*check_spill)(processor_set_t pset, thread_t thread);
+       sched_ipi_type_t (*ipi_policy)(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
+       bool    (*thread_should_yield)(processor_t processor, thread_t thread);
+};
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+extern const struct sched_dispatch_table sched_traditional_dispatch;
+extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch;
+#endif
 
 
-#include <libkern/OSBase.h>
+#if defined(CONFIG_SCHED_MULTIQ)
+extern const struct sched_dispatch_table sched_multiq_dispatch;
+extern const struct sched_dispatch_table sched_dualq_dispatch;
+#endif
 
 
-#define thread_set_timer_deadline(a)   \
-       thread_set_timer_deadline(__OSAbsoluteTime(a))
+#if defined(CONFIG_SCHED_PROTO)
+extern const struct sched_dispatch_table sched_proto_dispatch;
+#endif
 
 
+#if defined(CONFIG_SCHED_GRRR)
+extern const struct sched_dispatch_table sched_grrr_dispatch;
 #endif
 
 #endif
 
+/*
+ * It is an error to invoke any scheduler-related code
+ * before this is set up
+ */
+extern const struct sched_dispatch_table *sched_current_dispatch;
+
+#endif /* MACH_KERNEL_PRIVATE */
+
+__END_DECLS
+
 #endif /* _KERN_SCHED_PRIM_H_ */
 #endif /* _KERN_SCHED_PRIM_H_ */