]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/sched.h
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / kern / sched.h
index 9c98bd1041086ae05afd9a47468e8fa092d424f3..d8fe8ee3669112144c1ed97582f6cf754d6a2924 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
 #ifndef        _KERN_SCHED_H_
 #define _KERN_SCHED_H_
 
-#include <cpus.h>
-#include <simple_clock.h>
-#include <stat_time.h>
-
 #include <mach/policy.h>
 #include <kern/kern_types.h>
+#include <kern/smp.h>
 #include <kern/queue.h>
-#include <kern/lock.h>
 #include <kern/macro_help.h>
+#include <kern/timer_call.h>
+#include <kern/ast.h>
+#include <kern/kalloc.h>
+#include <kern/bits.h>
 
-#if    STAT_TIME
-
-/*
- *     Statistical timing uses microseconds as timer units.  16 bit shift
- *     yields priorities.  PRI_SHIFT_2 isn't needed.
- */
-#define PRI_SHIFT      (16 - SCHED_TICK_SHIFT)
-
-#else  /* STAT_TIME */
-
-/*
- *     Otherwise machine provides shift(s) based on time units it uses.
- */
-#include <machine/sched_param.h>
-
-#endif /* STAT_TIME */
-
-#define        NRQS            128                             /* 128 run queues per cpu */
-#define NRQBM          (NRQS / 32)             /* number of run queue bit maps */
+#define        NRQS            128                             /* 128 levels per run queue */
 
 #define MAXPRI         (NRQS-1)
-#define MINPRI         IDLEPRI                 /* lowest legal priority schedulable */
-#define        IDLEPRI         0                               /* idle thread priority */
-#define DEPRESSPRI     MINPRI                  /* depress priority */
+#define MINPRI         0                               /* lowest legal priority schedulable */
+#define        IDLEPRI         MINPRI                          /* idle thread priority */
+#define        NOPRI           -1
 
 /*
  *     High-level priority assignments
  *                             +
  *                             V
  * 80          Kernel mode only
- * 79          High priority
+ * 79          System high priority
  *                             A
  *                             +
  *                     (16 levels)
  *                             +
  *                             V
- * 64          High priority
+ * 64          System high priority
  * 63          Elevated priorities
  *                             A
  *                             +
  *************************************************************************
  */
 
+#define BASEPRI_RTQUEUES       (BASEPRI_REALTIME + 1)                          /* 97 */
 #define BASEPRI_REALTIME       (MAXPRI - (NRQS / 4) + 1)                       /* 96 */
 
-#define MAXPRI_STANDARD                (BASEPRI_REALTIME - 1)                          /* 95 */
-
-#define MAXPRI_KERNBAND                MAXPRI_STANDARD                                         /* 95 */
-#define MINPRI_KERNBAND                (MAXPRI_KERNBAND - (NRQS / 8) + 1)      /* 80 */
-
-#define MAXPRI_HIGHBAND                (MINPRI_KERNBAND - 1)                           /* 79 */
-#define MINPRI_HIGHBAND                (MAXPRI_HIGHBAND - (NRQS / 8) + 1)      /* 64 */
-
-#define MAXPRI_MAINBAND                (MINPRI_HIGHBAND - 1)                           /* 63 */
-#define BASEPRI_DEFAULT                (MAXPRI_MAINBAND - (NRQS / 4))          /* 31 */
-#define MINPRI_MAINBAND                MINPRI                                                          /*  0 */
-
-#define MINPRI_STANDARD                MINPRI_MAINBAND                                         /*  0 */
+#define MAXPRI_KERNEL          (BASEPRI_REALTIME - 1)                          /* 95 */
+#define BASEPRI_PREEMPT_HIGH   (BASEPRI_PREEMPT + 1)                           /* 93 */
+#define BASEPRI_PREEMPT                (MAXPRI_KERNEL - 3)                             /* 92 */
+#define BASEPRI_VM             (BASEPRI_PREEMPT - 1)                           /* 91 */
+
+#define BASEPRI_KERNEL         (MINPRI_KERNEL + 1)                             /* 81 */
+#define MINPRI_KERNEL          (MAXPRI_KERNEL - (NRQS / 8) + 1)                /* 80 */
+
+#define MAXPRI_RESERVED                (MINPRI_KERNEL - 1)                             /* 79 */
+#define BASEPRI_GRAPHICS       (MAXPRI_RESERVED - 3)                           /* 76 */
+#define MINPRI_RESERVED                (MAXPRI_RESERVED - (NRQS / 8) + 1)              /* 64 */
+
+#define MAXPRI_USER            (MINPRI_RESERVED - 1)                           /* 63 */
+#define BASEPRI_CONTROL                (BASEPRI_DEFAULT + 17)                          /* 48 */
+#define BASEPRI_FOREGROUND     (BASEPRI_DEFAULT + 16)                          /* 47 */
+#define BASEPRI_BACKGROUND     (BASEPRI_DEFAULT + 15)                          /* 46 */
+#define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT +  6)                          /* 37 */
+#define BASEPRI_DEFAULT                (MAXPRI_USER - (NRQS / 4))                      /* 31 */
+#define MAXPRI_SUPPRESSED      (BASEPRI_DEFAULT - 3)                           /* 28 */
+#define BASEPRI_UTILITY                (BASEPRI_DEFAULT - 11)                          /* 20 */
+#define MAXPRI_THROTTLE                (MINPRI + 4)                                    /*  4 */
+#define MINPRI_USER            MINPRI                                          /*  0 */
+
+#define DEPRESSPRI             MINPRI                  /* depress priority */
+#define MAXPRI_PROMOTE         (MAXPRI_KERNEL)         /* ceiling for mutex promotion */
+
+/* Type used for thread->sched_mode and saved_mode */
+typedef enum {
+       TH_MODE_NONE = 0,                                       /* unassigned, usually for saved_mode only */
+       TH_MODE_REALTIME,                                       /* time constraints supplied */
+       TH_MODE_FIXED,                                          /* use fixed priorities, no decay */
+       TH_MODE_TIMESHARE,                                      /* use timesharing algorithm */
+} sched_mode_t;
+
+/* Buckets used for load calculation */
+typedef enum {
+       TH_BUCKET_RUN = 0,      /* All runnable threads */
+       TH_BUCKET_FIXPRI,       /* Fixed-priority */
+       TH_BUCKET_SHARE_FG,     /* Timeshare thread above BASEPRI_UTILITY */
+       TH_BUCKET_SHARE_UT,     /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
+       TH_BUCKET_SHARE_BG,     /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
+       TH_BUCKET_MAX,
+} sched_bucket_t;
 
 /*
  *     Macro to check for invalid priorities.
  */
 #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
 
+struct runq_stats {
+       uint64_t                                count_sum;
+       uint64_t                                last_change_timestamp;
+};
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
+
 struct run_queue {
-       queue_head_t            queues[NRQS];           /* one for each priority */
-       decl_simple_lock_data(,lock)                    /* one lock for all queues */
-       int                                     bitmap[NRQBM];          /* run queue bitmap array */
        int                                     highq;                          /* highest runnable queue */
-       int                                     count;                          /* # of runnable threads */
+       bitmap_t                                bitmap[BITMAP_LEN(NRQS)];       /* run queue bitmap array */
+       int                                     count;                          /* # of threads total */
+       int                                     urgency;                        /* level of preemption urgency */
+       queue_head_t            queues[NRQS];           /* one for each priority */
+
+       struct runq_stats       runq_stats;
 };
 
-typedef struct run_queue       *run_queue_t;
-#define RUN_QUEUE_NULL         ((run_queue_t) 0)
+inline static void
+rq_bitmap_set(bitmap_t *map, u_int n)
+{
+       assert(n < NRQS);       
+       bitmap_set(map, n);
+}
+
+inline static void
+rq_bitmap_clear(bitmap_t *map, u_int n)
+{
+       assert(n < NRQS);       
+       bitmap_clear(map, n);
+}
+
+#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
+
+struct rt_queue {
+       _Atomic int             count;                          /* # of threads total */
+       queue_head_t            queue;                          /* all runnable RT threads */
+#if __SMP__
+       decl_simple_lock_data(,rt_lock)
+#endif
+       struct runq_stats       runq_stats;
+};
+typedef struct rt_queue *rt_queue_t;
 
-#define csw_needed(thread, processor) (                                                                                \
-       ((thread)->state & TH_SUSP)                                                                             ||              \
-       ((processor)->first_quantum?                                                                                    \
-        ((processor)->runq.highq > (thread)->sched_pri                         ||                      \
-         (processor)->processor_set->runq.highq > (thread)->sched_pri) :               \
-        ((processor)->runq.highq >= (thread)->sched_pri                        ||                      \
-         (processor)->processor_set->runq.highq >= (thread)->sched_pri))       )
+#if defined(CONFIG_SCHED_GRRR_CORE)
 
 /*
- *     Scheduler routines.
+ * We map standard Mach priorities to an abstract scale that more properly
+ * indicates how we want processor time allocated under contention.
  */
+typedef uint8_t        grrr_proportional_priority_t;
+typedef uint8_t grrr_group_index_t;
+
+#define NUM_GRRR_PROPORTIONAL_PRIORITIES       256
+#define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
+
+#if 0
+#define NUM_GRRR_GROUPS 8                                      /* log(256) */
+#endif
+
+#define NUM_GRRR_GROUPS 64                                     /* 256/4 */
+
+struct grrr_group {
+       queue_chain_t                   priority_order;                         /* next greatest weight group */
+       grrr_proportional_priority_t            minpriority;
+       grrr_group_index_t              index;
+
+       queue_head_t                    clients;
+       int                                             count;
+       uint32_t                                weight;
+#if 0
+       uint32_t                                deferred_removal_weight;
+#endif
+       uint32_t                                work;
+       thread_t                                current_client;
+};
+
+struct grrr_run_queue {
+       int                                     count;
+       uint32_t                        last_rescale_tick;
+       struct grrr_group       groups[NUM_GRRR_GROUPS];
+       queue_head_t            sorted_group_list;
+       uint32_t                        weight;
+       grrr_group_t            current_group;
+       
+       struct runq_stats   runq_stats;
+};
 
-/* Remove thread from its run queue */
-extern run_queue_t     rem_runq(
-                                       thread_t        thread);
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
 
-/* Mach factor computation (in mach_factor.c) */
-extern void            compute_mach_factor(void);
+extern int rt_runq_count(processor_set_t);
+extern void rt_runq_count_incr(processor_set_t);
+extern void rt_runq_count_decr(processor_set_t);
+
+#if defined(CONFIG_SCHED_MULTIQ)
+sched_group_t   sched_group_create(void);
+void            sched_group_destroy(sched_group_t sched_group);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
 
-/* Update threads quantum (in priority.c) */
-extern void            thread_quantum_update(
-                                       int                     mycpu,
-                                       thread_t        thread,
-                                       int                     nticks,
-                                       int                     state);
 
-extern int             min_quantum;    /* defines max context switch rate */
 
 /*
- *     Shift structures for holding update shifts.  Actual computation
- *     is  usage = (usage >> shift1) +/- (usage >> abs(shift2))  where the
- *     +/- is determined by the sign of shift 2.
+ *     Scheduler routines.
  */
-struct shift {
-       int     shift1;
-       int     shift2;
-};
 
-typedef        struct shift    *shift_t, shift_data_t;
+/* Handle quantum expiration for an executing thread */
+extern void            thread_quantum_expire(
+                                       timer_call_param_t      processor,
+                                       timer_call_param_t      thread);
+
+/* Context switch check for current processor */
+extern ast_t   csw_check(processor_t           processor,
+                                               ast_t                   check_reason);
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+extern uint32_t        std_quantum, min_std_quantum;
+extern uint32_t        std_quantum_us;
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint32_t thread_depress_time;
+extern uint32_t default_timeshare_computation;
+extern uint32_t default_timeshare_constraint;
+
+extern uint32_t        max_rt_quantum, min_rt_quantum;
+
+extern int default_preemption_rate;
+extern int default_bg_preemption_rate;
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
 /*
- *     Age usage (1 << SCHED_TICK_SHIFT) times per second.
+ *     Age usage  at approximately (1 << SCHED_TICK_SHIFT) times per second
+ *     Aging may be deferred during periods where all processors are idle
+ *     and cumulatively applied during periods of activity.
  */
+#define SCHED_TICK_SHIFT       3
+#define SCHED_TICK_MAX_DELTA   (8)
 
-extern unsigned        sched_tick;
+extern unsigned                sched_tick;
+extern uint32_t                sched_tick_interval;
 
-#define SCHED_TICK_SHIFT       3
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint64_t                sched_one_second_interval;
+
+/* Periodic computation of various averages */
+extern void            compute_averages(uint64_t);
+
+extern void            compute_averunnable(
+                                       void                    *nrun);
+
+extern void            compute_stack_target(
+                                       void                    *arg);
+
+extern void            compute_memory_pressure(
+                                       void                    *arg);
+
+extern void            compute_pageout_gc_throttle(
+                                       void                    *arg);
 
-#define SCHED_SCALE            128
-#define SCHED_SHIFT            7
+extern void            compute_pmap_gc_throttle(
+                                       void                    *arg);
 
 /*
- *     thread_timer_delta macro takes care of both thread timers.
+ *     Conversion factor from usage
+ *     to priority.
  */
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
-#define thread_timer_delta(thread)                                             \
-MACRO_BEGIN                                                                                                    \
-       register unsigned       delta;                                                          \
-                                                                                                                       \
-       delta = 0;                                                                                              \
-       TIMER_DELTA((thread)->system_timer,                                             \
-                                       (thread)->system_timer_save, delta);    \
-       TIMER_DELTA((thread)->user_timer,                                               \
-                                       (thread)->user_timer_save, delta);              \
-       (thread)->cpu_delta += delta;                                                   \
-       (thread)->sched_delta += (delta *                                               \
-                                       (thread)->processor_set->sched_load);   \
-MACRO_END
+#define MAX_LOAD (NRQS - 1)
+extern uint32_t                sched_pri_shifts[TH_BUCKET_MAX];
+extern uint32_t                sched_fixed_shift;
+extern int8_t          sched_load_shifts[NRQS];
+extern uint32_t                sched_decay_usage_age_factor;
+void sched_timeshare_consider_maintenance(uint64_t ctime);
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
+
+extern int32_t         sched_poll_yield_shift;
+extern uint64_t                sched_safe_duration;
+
+extern uint32_t                sched_load_average, sched_mach_factor;
+
+extern uint32_t                avenrun[3], mach_factor[3];
+
+extern uint64_t                max_unsafe_computation;
+extern uint64_t                max_poll_computation;
+
+extern volatile uint32_t sched_run_buckets[TH_BUCKET_MAX];
+
+extern uint32_t sched_run_incr(thread_t thread);
+extern uint32_t sched_run_decr(thread_t thread);
 
-#if    SIMPLE_CLOCK
 /*
- *     sched_usec is an exponential average of number of microseconds
- *     in a second for clock drift compensation.
+ *     thread_timer_delta macro takes care of both thread timers.
  */
-
-extern int     sched_usec;
-#endif /* SIMPLE_CLOCK */
+#define thread_timer_delta(thread, delta)                                      \
+MACRO_BEGIN                                                                                                    \
+       (delta) = (typeof(delta))timer_delta(&(thread)->system_timer,                   \
+                                                       &(thread)->system_timer_save);  \
+       (delta) += (typeof(delta))timer_delta(&(thread)->user_timer,                    \
+                                                       &(thread)->user_timer_save);    \
+MACRO_END
 
 #endif /* _KERN_SCHED_H_ */