/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/kern_types.h>
#include <kern/thread.h>
#include <sys/cdefs.h>
+#include <kern/block_hint.h>
#ifdef MACH_KERNEL_PRIVATE
+#include <mach/branch_predicates.h>
+
/* Initialization */
-extern void sched_init(void) __attribute__((section("__TEXT, initcode")));
+extern void sched_init(void);
extern void sched_startup(void);
extern void sched_timebase_init(void);
+extern void pset_rt_init(processor_set_t pset);
+
+extern void sched_rtglobal_init(processor_set_t pset);
+
+extern rt_queue_t sched_rtglobal_runq(processor_set_t pset);
+
+extern void sched_rtglobal_queue_shutdown(processor_t processor);
+
+extern int64_t sched_rtglobal_runq_count_sum(void);
+
+extern void sched_check_spill(processor_set_t pset, thread_t thread);
+
+extern bool sched_thread_should_yield(processor_t processor, thread_t thread);
+
/* Force a preemption point for a thread and wait for it to stop running */
extern boolean_t thread_stop(
- thread_t thread);
+ thread_t thread,
+ boolean_t until_not_runnable);
/* Release a previous stop request */
extern void thread_unstop(
/* Wait for a thread to stop running */
extern void thread_wait(
- thread_t thread);
+ thread_t thread,
+ boolean_t until_not_runnable);
/* Unblock thread on wake up */
extern boolean_t thread_unblock(
int priority);
/* Set base priority of the specified thread */
-extern void set_priority(
+extern void sched_set_thread_base_priority(
thread_t thread,
int priority);
+/* Set the thread's true scheduling mode */
+extern void sched_set_thread_mode(thread_t thread,
+ sched_mode_t mode);
+/* Demote the true scheduler mode */
+extern void sched_thread_mode_demote(thread_t thread,
+ uint32_t reason);
+/* Un-demote the true scheduler mode */
+extern void sched_thread_mode_undemote(thread_t thread,
+ uint32_t reason);
+
+/* Re-evaluate base priority of thread (thread locked) */
+void thread_recompute_priority(thread_t thread);
+
+/* Re-evaluate base priority of thread (thread unlocked) */
+void thread_recompute_qos(thread_t thread);
+
/* Reset scheduled priority of thread */
-extern void compute_priority(
+extern void thread_recompute_sched_pri(
thread_t thread,
boolean_t override_depress);
-/* Adjust scheduled priority of thread during execution */
-extern void compute_my_priority(
- thread_t thread);
-
/* Periodic scheduler activity */
-extern void sched_tick_thread(void);
+extern void sched_init_thread(void (*)(void));
/* Perform sched_tick housekeeping activities */
-extern void update_priority(
+extern boolean_t can_update_priority(
thread_t thread);
+extern void update_priority(
+ thread_t thread);
+
+extern void lightweight_update_priority(
+ thread_t thread);
+
+extern void sched_default_quantum_expire(thread_t thread);
+
/* Idle processor thread */
extern void idle_thread(void);
thread_t thread,
integer_t options);
-#define SCHED_TAILQ 1
-#define SCHED_HEADQ 2
-#define SCHED_PREEMPT 4
+typedef enum {
+ SCHED_NONE = 0x0,
+ SCHED_TAILQ = 0x1,
+ SCHED_HEADQ = 0x2,
+ SCHED_PREEMPT = 0x4,
+ SCHED_REBALANCE = 0x8,
+} sched_options_t;
extern processor_set_t task_choose_pset(
task_t task);
extern processor_t thread_bind(
processor_t processor);
+/* Choose the best processor to run a thread */
+extern processor_t choose_processor(
+ processor_set_t pset,
+ processor_t processor,
+ thread_t thread);
+
+extern void sched_SMT_balance(
+ processor_t processor,
+ processor_set_t pset);
+
+extern void thread_quantum_init(
+ thread_t thread);
+
extern void run_queue_init(
run_queue_t runq);
+extern thread_t run_queue_dequeue(
+ run_queue_t runq,
+ integer_t options);
+
+extern boolean_t run_queue_enqueue(
+ run_queue_t runq,
+ thread_t thread,
+ integer_t options);
+
+extern void run_queue_remove(
+ run_queue_t runq,
+ thread_t thread);
+
+struct sched_update_scan_context
+{
+ uint64_t earliest_bg_make_runnable_time;
+ uint64_t earliest_normal_make_runnable_time;
+ uint64_t earliest_rt_make_runnable_time;
+};
+typedef struct sched_update_scan_context *sched_update_scan_context_t;
+
+extern void sched_rtglobal_runq_scan(sched_update_scan_context_t scan_context);
+
+/*
+ * Enum to define various events which need IPIs. The IPI policy
+ * engine decides what kind of IPI to use based on destination
+ * processor state, thread and one of the following scheduling events.
+ */
+typedef enum {
+ SCHED_IPI_EVENT_BOUND_THR = 0x1,
+ SCHED_IPI_EVENT_PREEMPT = 0x2,
+ SCHED_IPI_EVENT_SMT_REBAL = 0x3,
+ SCHED_IPI_EVENT_SPILL = 0x4,
+ SCHED_IPI_EVENT_REBALANCE = 0x5,
+} sched_ipi_event_t;
+
+
+/* Enum to define various IPI types used by the scheduler */
+typedef enum {
+ SCHED_IPI_NONE = 0x0,
+ SCHED_IPI_IMMEDIATE = 0x1,
+ SCHED_IPI_IDLE = 0x2,
+ SCHED_IPI_DEFERRED = 0x3,
+} sched_ipi_type_t;
+
+/* The IPI policy engine behaves in the following manner:
+ * - All scheduler events which need an IPI invoke sched_ipi_action() with
+ * the appropriate destination processor, thread and event.
+ * - sched_ipi_action() performs basic checks, invokes the scheduler specific
+ * ipi_policy routine and sets pending_AST bits based on the result.
+ * - Once the pset lock is dropped, the scheduler invokes sched_ipi_perform()
+ * routine which actually sends the appropriate IPI to the destination core.
+ */
+extern sched_ipi_type_t sched_ipi_action(processor_t dst, thread_t thread,
+ boolean_t dst_idle, sched_ipi_event_t event);
+extern void sched_ipi_perform(processor_t dst, sched_ipi_type_t ipi);
+
+/* sched_ipi_policy() is the global default IPI policy for all schedulers */
+extern sched_ipi_type_t sched_ipi_policy(processor_t dst, thread_t thread,
+ boolean_t dst_idle, sched_ipi_event_t event);
+
+/* sched_ipi_deferred_policy() is the global default deferred IPI policy for all schedulers */
+extern sched_ipi_type_t sched_ipi_deferred_policy(processor_set_t pset,
+ processor_t dst, sched_ipi_event_t event);
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+
+extern boolean_t thread_update_add_thread(thread_t thread);
+extern void thread_update_process_threads(void);
+extern boolean_t runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
+
+extern void sched_timeshare_init(void);
+extern void sched_timeshare_timebase_init(void);
+extern void sched_timeshare_maintenance_continue(void);
+
+extern boolean_t priority_is_urgent(int priority);
+extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
+
+extern int sched_compute_timeshare_priority(thread_t thread);
+
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+/* Remove thread from its run queue */
+extern boolean_t thread_run_queue_remove(thread_t thread);
+thread_t thread_run_queue_remove_for_handoff(thread_t thread);
+
+/* Put a thread back in the run queue after being yanked */
+extern void thread_run_queue_reinsert(thread_t thread, integer_t options);
+
extern void thread_timer_expire(
void *thread,
void *p1);
+extern boolean_t thread_eager_preemption(
+ thread_t thread);
+
+extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
+
/* Set the maximum interrupt level for the thread */
__private_extern__ wait_interrupt_t thread_interrupt_level(
wait_interrupt_t interruptible);
thread_t thread,
wait_result_t result);
+extern void sched_stats_handle_csw(
+ processor_t processor,
+ int reasons,
+ int selfpri,
+ int otherpri);
+
+extern void sched_stats_handle_runq_change(
+ struct runq_stats *stats,
+ int old_count);
+
+
+#if DEBUG
+
+#define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) \
+do { \
+ if (__builtin_expect(sched_stats_active, 0)) { \
+ sched_stats_handle_csw((processor), \
+ (reasons), (selfpri), (otherpri)); \
+ } \
+} while (0)
+
+
+#define SCHED_STATS_RUNQ_CHANGE(stats, old_count) \
+do { \
+ if (__builtin_expect(sched_stats_active, 0)) { \
+ sched_stats_handle_runq_change((stats), \
+ (old_count)); \
+ } \
+} while (0)
+
+#else /* DEBUG */
+
+#define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) do { }while(0)
+#define SCHED_STATS_RUNQ_CHANGE(stats, old_count) do { }while(0)
+
+#endif /* DEBUG */
+
+extern uint32_t sched_debug_flags;
+#define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS 0x00000001
+#define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS 0x00000002
+
+#define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...) do { \
+ if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) { \
+ KERNEL_DEBUG_CONSTANT(__VA_ARGS__); \
+ } \
+ } while(0)
+
+#define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(...) do { \
+ if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) { \
+ KERNEL_DEBUG_CONSTANT(__VA_ARGS__); \
+ } \
+ } while(0)
+
+#define THREAD_URGENCY_NONE 0 /* indicates that there is no currently runnable */
+#define THREAD_URGENCY_BACKGROUND 1 /* indicates that the thread is marked as a "background" thread */
+#define THREAD_URGENCY_NORMAL 2 /* indicates that the thread is marked as a "normal" thread */
+#define THREAD_URGENCY_REAL_TIME 3 /* indicates that the thread is marked as a "real-time" or urgent thread */
+#define THREAD_URGENCY_MAX 4 /* Marker */
+/* Returns the "urgency" of a thread (provided by scheduler) */
+extern int thread_get_urgency(
+ thread_t thread,
+ uint64_t *rt_period,
+ uint64_t *rt_deadline);
+
+/* Tells the "urgency" of the just scheduled thread (provided by CPU PM) */
+extern void thread_tell_urgency(
+ int urgency,
+ uint64_t rt_period,
+ uint64_t rt_deadline,
+ uint64_t sched_latency,
+ thread_t nthread);
+
+/* Tells if there are "active" RT threads in the system (provided by CPU PM) */
+extern void active_rt_threads(
+ boolean_t active);
+
+/* Returns the perfcontrol attribute for the thread */
+extern perfcontrol_class_t thread_get_perfcontrol_class(
+ thread_t thread);
+
+#define PSET_LOAD_NUMERATOR_SHIFT 16
+#define PSET_LOAD_FRACTIONAL_SHIFT 4
+
+extern int sched_get_pset_load_average(processor_set_t pset);
+extern void sched_update_pset_load_average(processor_set_t pset);
+
+/* Generic routine for Non-AMP schedulers to calculate parallelism */
+extern uint32_t sched_qos_max_parallelism(int qos, uint64_t options);
+
#endif /* MACH_KERNEL_PRIVATE */
__BEGIN_DECLS
#ifdef XNU_KERNEL_PRIVATE
-extern boolean_t assert_wait_possible(void);
+/* Toggles a global override to turn off CPU Throttling */
+#define CPU_THROTTLE_DISABLE 0
+#define CPU_THROTTLE_ENABLE 1
+extern void sys_override_cpu_throttle(int flag);
/*
****************** Only exported until BSD stops using ********************
*/
+extern void thread_vm_bind_group_add(void);
+
/* Wake up thread directly, passing result */
extern kern_return_t clear_wait(
thread_t thread,
wait_result_t result);
/* Start thread running */
-extern void thread_bootstrap_return(void);
+extern void thread_bootstrap_return(void) __attribute__((noreturn));
/* Return from exception (BSD-visible interface) */
extern void thread_exception_return(void) __dead2;
+#define SCHED_STRING_MAX_LENGTH (48)
+/* String declaring the name of the current scheduler */
+extern char sched_string[SCHED_STRING_MAX_LENGTH];
+
+extern thread_t port_name_to_thread_for_ulock(mach_port_name_t thread_name);
+
+/* Attempt to context switch to a specific runnable thread */
+extern wait_result_t thread_handoff(thread_t thread);
+
+extern struct waitq *assert_wait_queue(event_t event);
+
+extern kern_return_t thread_wakeup_one_with_pri(event_t event, int priority);
+
+extern thread_t thread_wakeup_identify(event_t event, int priority);
+
#endif /* XNU_KERNEL_PRIVATE */
+#ifdef KERNEL_PRIVATE
+/* Set pending block hint for a particular object before we go into a wait state */
+extern void thread_set_pending_block_hint(
+ thread_t thread,
+ block_hint_t block_hint);
+
+#define QOS_PARALLELISM_COUNT_LOGICAL 0x1
+#define QOS_PARALLELISM_REALTIME 0x2
+extern uint32_t qos_max_parallelism(int qos, uint64_t options);
+
+#endif /* KERNEL_PRIVATE */
+
/* Context switch */
extern wait_result_t thread_block(
thread_continue_t continuation);
uint32_t interval,
uint32_t scale_factor);
+/* Assert that the thread intends to wait with an urgency, timeout and leeway */
+extern wait_result_t assert_wait_timeout_with_leeway(
+ event_t event,
+ wait_interrupt_t interruptible,
+ wait_timeout_urgency_t urgency,
+ uint32_t interval,
+ uint32_t leeway,
+ uint32_t scale_factor);
+
extern wait_result_t assert_wait_deadline(
event_t event,
wait_interrupt_t interruptible,
uint64_t deadline);
+/* Assert that the thread intends to wait with an urgency, deadline, and leeway */
+extern wait_result_t assert_wait_deadline_with_leeway(
+ event_t event,
+ wait_interrupt_t interruptible,
+ wait_timeout_urgency_t urgency,
+ uint64_t deadline,
+ uint64_t leeway);
+
/* Wake up thread (or threads) waiting on a particular event */
extern kern_return_t thread_wakeup_prim(
event_t event,
boolean_t one_thread,
- wait_result_t result);
+ wait_result_t result);
#define thread_wakeup(x) \
thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
#define thread_wakeup_one(x) \
thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
-extern boolean_t preemption_enabled(void);
+/* Wakeup the specified thread if it is waiting on this event */
+extern kern_return_t thread_wakeup_thread(event_t event, thread_t thread);
-#ifdef KERNEL_PRIVATE
+extern boolean_t preemption_enabled(void);
-#ifndef __LP64__
+#ifdef MACH_KERNEL_PRIVATE
/*
- * Obsolete interfaces.
+ * Scheduler algorithm indirection. If only one algorithm is
+ * enabled at compile-time, a direction function call is used.
+ * If more than one is enabled, calls are dispatched through
+ * a function pointer table.
*/
-extern void thread_set_timer(
- uint32_t interval,
- uint32_t scale_factor);
+#if !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_MULTIQ)
+#error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
+#endif
-extern void thread_set_timer_deadline(
- uint64_t deadline);
+#if DEBUG
+#define SCHED(f) (sched_current_dispatch->f)
+#else /* DEBUG */
-extern void thread_cancel_timer(void);
-
-#ifndef MACH_KERNEL_PRIVATE
-
-#ifndef ABSOLUTETIME_SCALAR_TYPE
-
-#define thread_set_timer_deadline(a) \
- thread_set_timer_deadline(__OSAbsoluteTime(a))
+/*
+ * For DEV & REL kernels, use a static dispatch table instead of
+ * using the indirect function table.
+ */
+extern const struct sched_dispatch_table sched_multiq_dispatch;
+#define SCHED(f) (sched_multiq_dispatch.f)
+
+#endif /* DEBUG */
+
+struct sched_dispatch_table {
+ const char *sched_name;
+ void (*init)(void); /* Init global state */
+ void (*timebase_init)(void); /* Timebase-dependent initialization */
+ void (*processor_init)(processor_t processor); /* Per-processor scheduler init */
+ void (*pset_init)(processor_set_t pset); /* Per-processor set scheduler init */
+
+ void (*maintenance_continuation)(void); /* Function called regularly */
+
+ /*
+ * Choose a thread of greater or equal priority from the per-processor
+ * runqueue for timeshare/fixed threads
+ */
+ thread_t (*choose_thread)(
+ processor_t processor,
+ int priority,
+ ast_t reason);
+
+ /* True if scheduler supports stealing threads */
+ boolean_t steal_thread_enabled;
+
+ /*
+ * Steal a thread from another processor in the pset so that it can run
+ * immediately
+ */
+ thread_t (*steal_thread)(
+ processor_set_t pset);
+
+ /*
+ * Compute priority for a timeshare thread based on base priority.
+ */
+ int (*compute_timeshare_priority)(thread_t thread);
+
+ /*
+ * Pick the best processor for a thread (any kind of thread) to run on.
+ */
+ processor_t (*choose_processor)(
+ processor_set_t pset,
+ processor_t processor,
+ thread_t thread);
+ /*
+ * Enqueue a timeshare or fixed priority thread onto the per-processor
+ * runqueue
+ */
+ boolean_t (*processor_enqueue)(
+ processor_t processor,
+ thread_t thread,
+ integer_t options);
+
+ /* Migrate threads away in preparation for processor shutdown */
+ void (*processor_queue_shutdown)(
+ processor_t processor);
+
+ /* Remove the specific thread from the per-processor runqueue */
+ boolean_t (*processor_queue_remove)(
+ processor_t processor,
+ thread_t thread);
+
+ /*
+ * Does the per-processor runqueue have any timeshare or fixed priority
+ * threads on it? Called without pset lock held, so should
+ * not assume immutability while executing.
+ */
+ boolean_t (*processor_queue_empty)(processor_t processor);
+
+ /*
+ * Would this priority trigger an urgent preemption if it's sitting
+ * on the per-processor runqueue?
+ */
+ boolean_t (*priority_is_urgent)(int priority);
+
+ /*
+ * Does the per-processor runqueue contain runnable threads that
+ * should cause the currently-running thread to be preempted?
+ */
+ ast_t (*processor_csw_check)(processor_t processor);
+
+ /*
+ * Does the per-processor runqueue contain a runnable thread
+ * of > or >= priority, as a preflight for choose_thread() or other
+ * thread selection
+ */
+ boolean_t (*processor_queue_has_priority)(processor_t processor,
+ int priority,
+ boolean_t gte);
+
+ /* Quantum size for the specified non-realtime thread. */
+ uint32_t (*initial_quantum_size)(thread_t thread);
+
+ /* Scheduler mode for a new thread */
+ sched_mode_t (*initial_thread_sched_mode)(task_t parent_task);
+
+ /*
+ * Is it safe to call update_priority, which may change a thread's
+ * runqueue or other state. This can be used to throttle changes
+ * to dynamic priority.
+ */
+ boolean_t (*can_update_priority)(thread_t thread);
+
+ /*
+ * Update both scheduled priority and other persistent state.
+ * Side effects may including migration to another processor's runqueue.
+ */
+ void (*update_priority)(thread_t thread);
+
+ /* Lower overhead update to scheduled priority and state. */
+ void (*lightweight_update_priority)(thread_t thread);
+
+ /* Callback for non-realtime threads when the quantum timer fires */
+ void (*quantum_expire)(thread_t thread);
+
+ /*
+ * Runnable threads on per-processor runqueue. Should only
+ * be used for relative comparisons of load between processors.
+ */
+ int (*processor_runq_count)(processor_t processor);
+
+ /* Aggregate runcount statistics for per-processor runqueue */
+ uint64_t (*processor_runq_stats_count_sum)(processor_t processor);
+
+ boolean_t (*processor_bound_count)(processor_t processor);
+
+ void (*thread_update_scan)(sched_update_scan_context_t scan_context);
+
+ /*
+ * Use processor->next_thread to pin a thread to an idle
+ * processor. If FALSE, threads are enqueued and can
+ * be stolen by other processors.
+ */
+ boolean_t direct_dispatch_to_idle_processors;
+
+ /* Supports more than one pset */
+ boolean_t multiple_psets_enabled;
+ /* Supports scheduler groups */
+ boolean_t sched_groups_enabled;
+
+ /* Supports avoid-processor */
+ boolean_t avoid_processor_enabled;
+
+ /* Returns true if this processor should avoid running this thread. */
+ bool (*thread_avoid_processor)(processor_t processor, thread_t thread);
+
+ /*
+ * Invoked when a processor is about to choose the idle thread
+ * Used to send IPIs to a processor which would be preferred to be idle instead.
+ * Called with pset lock held, returns pset lock unlocked.
+ */
+ void (*processor_balance)(processor_t processor, processor_set_t pset);
+ rt_queue_t (*rt_runq)(processor_set_t pset);
+ void (*rt_init)(processor_set_t pset);
+ void (*rt_queue_shutdown)(processor_t processor);
+ void (*rt_runq_scan)(sched_update_scan_context_t scan_context);
+ int64_t (*rt_runq_count_sum)(void);
+
+ uint32_t (*qos_max_parallelism)(int qos, uint64_t options);
+ void (*check_spill)(processor_set_t pset, thread_t thread);
+ sched_ipi_type_t (*ipi_policy)(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
+ bool (*thread_should_yield)(processor_t processor, thread_t thread);
+};
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+extern const struct sched_dispatch_table sched_traditional_dispatch;
+extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_MULTIQ)
+extern const struct sched_dispatch_table sched_multiq_dispatch;
+extern const struct sched_dispatch_table sched_dualq_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_PROTO)
+extern const struct sched_dispatch_table sched_proto_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_GRRR)
+extern const struct sched_dispatch_table sched_grrr_dispatch;
+#endif
-#endif /* ABSOLUTETIME_SCALAR_TYPE */
+/*
+ * It is an error to invoke any scheduler-related code
+ * before this is set up
+ */
+extern const struct sched_dispatch_table *sched_current_dispatch;
#endif /* MACH_KERNEL_PRIVATE */
-#endif /* __LP64__ */
-
-#endif /* KERNEL_PRIVATE */
-
__END_DECLS
#endif /* _KERN_SCHED_PRIM_H_ */