/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
#ifndef _KERN_SCHED_H_
#define _KERN_SCHED_H_
-#include <cpus.h>
-#include <simple_clock.h>
-#include <stat_time.h>
-
#include <mach/policy.h>
#include <kern/kern_types.h>
+#include <kern/smp.h>
#include <kern/queue.h>
-#include <kern/lock.h>
#include <kern/macro_help.h>
+#include <kern/timer_call.h>
+#include <kern/ast.h>
+#include <kern/kalloc.h>
+#include <kern/bits.h>
-#if STAT_TIME
-
-/*
- * Statistical timing uses microseconds as timer units. 16 bit shift
- * yields priorities. PRI_SHIFT_2 isn't needed.
- */
-#define PRI_SHIFT (16 - SCHED_TICK_SHIFT)
-
-#else /* STAT_TIME */
-
-/*
- * Otherwise machine provides shift(s) based on time units it uses.
- */
-#include <machine/sched_param.h>
-
-#endif /* STAT_TIME */
-
-#define NRQS 128 /* 128 run queues per cpu */
-#define NRQBM (NRQS / 32) /* number of run queue bit maps */
+#define NRQS 128 /* 128 levels per run queue */
#define MAXPRI (NRQS-1)
-#define MINPRI IDLEPRI /* lowest legal priority schedulable */
-#define IDLEPRI 0 /* idle thread priority */
-#define DEPRESSPRI MINPRI /* depress priority */
+#define MINPRI 0 /* lowest legal priority schedulable */
+#define IDLEPRI MINPRI /* idle thread priority */
+#define NOPRI -1
/*
* High-level priority assignments
* +
* V
* 80 Kernel mode only
- * 79 High priority
+ * 79 System high priority
* A
* +
* (16 levels)
* +
* V
- * 64 High priority
+ * 64 System high priority
* 63 Elevated priorities
* A
* +
*************************************************************************
*/
+#define BASEPRI_RTQUEUES (BASEPRI_REALTIME + 1) /* 97 */
#define BASEPRI_REALTIME (MAXPRI - (NRQS / 4) + 1) /* 96 */
-#define MAXPRI_STANDARD (BASEPRI_REALTIME - 1) /* 95 */
-
-#define MAXPRI_KERNBAND MAXPRI_STANDARD /* 95 */
-#define MINPRI_KERNBAND (MAXPRI_KERNBAND - (NRQS / 8) + 1) /* 80 */
-
-#define MAXPRI_HIGHBAND (MINPRI_KERNBAND - 1) /* 79 */
-#define MINPRI_HIGHBAND (MAXPRI_HIGHBAND - (NRQS / 8) + 1) /* 64 */
-
-#define MAXPRI_MAINBAND (MINPRI_HIGHBAND - 1) /* 63 */
-#define BASEPRI_DEFAULT (MAXPRI_MAINBAND - (NRQS / 4)) /* 31 */
-#define MINPRI_MAINBAND MINPRI /* 0 */
-
-#define MINPRI_STANDARD MINPRI_MAINBAND /* 0 */
+#define MAXPRI_KERNEL (BASEPRI_REALTIME - 1) /* 95 */
+#define BASEPRI_PREEMPT_HIGH (BASEPRI_PREEMPT + 1) /* 93 */
+#define BASEPRI_PREEMPT (MAXPRI_KERNEL - 3) /* 92 */
+#define BASEPRI_VM (BASEPRI_PREEMPT - 1) /* 91 */
+
+#define BASEPRI_KERNEL (MINPRI_KERNEL + 1) /* 81 */
+#define MINPRI_KERNEL (MAXPRI_KERNEL - (NRQS / 8) + 1) /* 80 */
+
+#define MAXPRI_RESERVED (MINPRI_KERNEL - 1) /* 79 */
+#define BASEPRI_GRAPHICS (MAXPRI_RESERVED - 3) /* 76 */
+#define MINPRI_RESERVED (MAXPRI_RESERVED - (NRQS / 8) + 1) /* 64 */
+
+#define MAXPRI_USER (MINPRI_RESERVED - 1) /* 63 */
+#define BASEPRI_CONTROL (BASEPRI_DEFAULT + 17) /* 48 */
+#define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */
+#define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */
+#define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT + 6) /* 37 */
+#define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS / 4)) /* 31 */
+#define MAXPRI_SUPPRESSED (BASEPRI_DEFAULT - 3) /* 28 */
+#define BASEPRI_UTILITY (BASEPRI_DEFAULT - 11) /* 20 */
+#define MAXPRI_THROTTLE (MINPRI + 4) /* 4 */
+#define MINPRI_USER MINPRI /* 0 */
+
+#define DEPRESSPRI MINPRI /* depress priority */
+#define MAXPRI_PROMOTE (MAXPRI_KERNEL) /* ceiling for mutex promotion */
+
+/* Type used for thread->sched_mode and saved_mode */
+typedef enum {
+ TH_MODE_NONE = 0, /* unassigned, usually for saved_mode only */
+ TH_MODE_REALTIME, /* time constraints supplied */
+ TH_MODE_FIXED, /* use fixed priorities, no decay */
+ TH_MODE_TIMESHARE, /* use timesharing algorithm */
+} sched_mode_t;
+
+/* Buckets used for load calculation */
+typedef enum {
+ TH_BUCKET_RUN = 0, /* All runnable threads */
+ TH_BUCKET_FIXPRI, /* Fixed-priority */
+ TH_BUCKET_SHARE_FG, /* Timeshare thread above BASEPRI_UTILITY */
+ TH_BUCKET_SHARE_UT, /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
+ TH_BUCKET_SHARE_BG, /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
+ TH_BUCKET_MAX,
+} sched_bucket_t;
/*
* Macro to check for invalid priorities.
*/
#define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
+struct runq_stats {
+ uint64_t count_sum;
+ uint64_t last_change_timestamp;
+};
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
+
struct run_queue {
- queue_head_t queues[NRQS]; /* one for each priority */
- decl_simple_lock_data(,lock) /* one lock for all queues */
- int bitmap[NRQBM]; /* run queue bitmap array */
int highq; /* highest runnable queue */
- int count; /* # of runnable threads */
+ bitmap_t bitmap[BITMAP_LEN(NRQS)]; /* run queue bitmap array */
+ int count; /* # of threads total */
+ int urgency; /* level of preemption urgency */
+ queue_head_t queues[NRQS]; /* one for each priority */
+
+ struct runq_stats runq_stats;
};
-typedef struct run_queue *run_queue_t;
-#define RUN_QUEUE_NULL ((run_queue_t) 0)
+inline static void
+rq_bitmap_set(bitmap_t *map, u_int n)
+{
+ assert(n < NRQS);
+ bitmap_set(map, n);
+}
+
+inline static void
+rq_bitmap_clear(bitmap_t *map, u_int n)
+{
+ assert(n < NRQS);
+ bitmap_clear(map, n);
+}
+
+#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
+
+struct rt_queue {
+ _Atomic int count; /* # of threads total */
+ queue_head_t queue; /* all runnable RT threads */
+#if __SMP__
+ decl_simple_lock_data(,rt_lock)
+#endif
+ struct runq_stats runq_stats;
+};
+typedef struct rt_queue *rt_queue_t;
-#define csw_needed(thread, processor) ( \
- ((thread)->state & TH_SUSP) || \
- ((processor)->first_quantum? \
- ((processor)->runq.highq > (thread)->sched_pri || \
- (processor)->processor_set->runq.highq > (thread)->sched_pri) : \
- ((processor)->runq.highq >= (thread)->sched_pri || \
- (processor)->processor_set->runq.highq >= (thread)->sched_pri)) )
+#if defined(CONFIG_SCHED_GRRR_CORE)
/*
- * Scheduler routines.
+ * We map standard Mach priorities to an abstract scale that more properly
+ * indicates how we want processor time allocated under contention.
*/
+typedef uint8_t grrr_proportional_priority_t;
+typedef uint8_t grrr_group_index_t;
+
+#define NUM_GRRR_PROPORTIONAL_PRIORITIES 256
+#define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
+
+#if 0
+#define NUM_GRRR_GROUPS 8 /* log(256) */
+#endif
+
+#define NUM_GRRR_GROUPS 64 /* 256/4 */
+
+struct grrr_group {
+ queue_chain_t priority_order; /* next greatest weight group */
+ grrr_proportional_priority_t minpriority;
+ grrr_group_index_t index;
+
+ queue_head_t clients;
+ int count;
+ uint32_t weight;
+#if 0
+ uint32_t deferred_removal_weight;
+#endif
+ uint32_t work;
+ thread_t current_client;
+};
+
+struct grrr_run_queue {
+ int count;
+ uint32_t last_rescale_tick;
+ struct grrr_group groups[NUM_GRRR_GROUPS];
+ queue_head_t sorted_group_list;
+ uint32_t weight;
+ grrr_group_t current_group;
+
+ struct runq_stats runq_stats;
+};
-/* Remove thread from its run queue */
-extern run_queue_t rem_runq(
- thread_t thread);
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
-/* Mach factor computation (in mach_factor.c) */
-extern void compute_mach_factor(void);
+extern int rt_runq_count(processor_set_t);
+extern void rt_runq_count_incr(processor_set_t);
+extern void rt_runq_count_decr(processor_set_t);
+
+#if defined(CONFIG_SCHED_MULTIQ)
+sched_group_t sched_group_create(void);
+void sched_group_destroy(sched_group_t sched_group);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
-/* Update threads quantum (in priority.c) */
-extern void thread_quantum_update(
- int mycpu,
- thread_t thread,
- int nticks,
- int state);
-extern int min_quantum; /* defines max context switch rate */
/*
- * Shift structures for holding update shifts. Actual computation
- * is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the
- * +/- is determined by the sign of shift 2.
+ * Scheduler routines.
*/
-struct shift {
- int shift1;
- int shift2;
-};
-typedef struct shift *shift_t, shift_data_t;
+/* Handle quantum expiration for an executing thread */
+extern void thread_quantum_expire(
+ timer_call_param_t processor,
+ timer_call_param_t thread);
+
+/* Context switch check for current processor */
+extern ast_t csw_check(processor_t processor,
+ ast_t check_reason);
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+extern uint32_t std_quantum, min_std_quantum;
+extern uint32_t std_quantum_us;
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint32_t thread_depress_time;
+extern uint32_t default_timeshare_computation;
+extern uint32_t default_timeshare_constraint;
+
+extern uint32_t max_rt_quantum, min_rt_quantum;
+
+extern int default_preemption_rate;
+extern int default_bg_preemption_rate;
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
/*
- * Age usage (1 << SCHED_TICK_SHIFT) times per second.
+ * Age usage at approximately (1 << SCHED_TICK_SHIFT) times per second
+ * Aging may be deferred during periods where all processors are idle
+ * and cumulatively applied during periods of activity.
*/
+#define SCHED_TICK_SHIFT 3
+#define SCHED_TICK_MAX_DELTA (8)
-extern unsigned sched_tick;
+extern unsigned sched_tick;
+extern uint32_t sched_tick_interval;
-#define SCHED_TICK_SHIFT 3
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint64_t sched_one_second_interval;
+
+/* Periodic computation of various averages */
+extern void compute_averages(uint64_t);
+
+extern void compute_averunnable(
+ void *nrun);
+
+extern void compute_stack_target(
+ void *arg);
+
+extern void compute_memory_pressure(
+ void *arg);
+
+extern void compute_pageout_gc_throttle(
+ void *arg);
-#define SCHED_SCALE 128
-#define SCHED_SHIFT 7
+extern void compute_pmap_gc_throttle(
+ void *arg);
/*
- * thread_timer_delta macro takes care of both thread timers.
+ * Conversion factor from usage
+ * to priority.
*/
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
-#define thread_timer_delta(thread) \
-MACRO_BEGIN \
- register unsigned delta; \
- \
- delta = 0; \
- TIMER_DELTA((thread)->system_timer, \
- (thread)->system_timer_save, delta); \
- TIMER_DELTA((thread)->user_timer, \
- (thread)->user_timer_save, delta); \
- (thread)->cpu_delta += delta; \
- (thread)->sched_delta += (delta * \
- (thread)->processor_set->sched_load); \
-MACRO_END
+#define MAX_LOAD (NRQS - 1)
+extern uint32_t sched_pri_shifts[TH_BUCKET_MAX];
+extern uint32_t sched_fixed_shift;
+extern int8_t sched_load_shifts[NRQS];
+extern uint32_t sched_decay_usage_age_factor;
+void sched_timeshare_consider_maintenance(uint64_t ctime);
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
+
+extern int32_t sched_poll_yield_shift;
+extern uint64_t sched_safe_duration;
+
+extern uint32_t sched_load_average, sched_mach_factor;
+
+extern uint32_t avenrun[3], mach_factor[3];
+
+extern uint64_t max_unsafe_computation;
+extern uint64_t max_poll_computation;
+
+extern volatile uint32_t sched_run_buckets[TH_BUCKET_MAX];
+
+extern uint32_t sched_run_incr(thread_t thread);
+extern uint32_t sched_run_decr(thread_t thread);
-#if SIMPLE_CLOCK
/*
- * sched_usec is an exponential average of number of microseconds
- * in a second for clock drift compensation.
+ * thread_timer_delta macro takes care of both thread timers.
*/
-
-extern int sched_usec;
-#endif /* SIMPLE_CLOCK */
+#define thread_timer_delta(thread, delta) \
+MACRO_BEGIN \
+ (delta) = (typeof(delta))timer_delta(&(thread)->system_timer, \
+ &(thread)->system_timer_save); \
+ (delta) += (typeof(delta))timer_delta(&(thread)->user_timer, \
+ &(thread)->user_timer_save); \
+MACRO_END
#endif /* _KERN_SCHED_H_ */