xnu-4570.41.2.tar.gz

[apple/xnu.git] / osfmk / kern / sched.h
diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h

index 347dc2f0fdca1f789337317580bdac1c54cd05b4..d8fe8ee3669112144c1ed97582f6cf754d6a2924 100644 (file)
--- a/osfmk/kern/sched.h
+++ b/osfmk/kern/sched.h
@@ -1,14 +1,19 @@
  /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
   * 
   * The Original Code and all software distributed under the License are
   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -18,7 +23,7 @@
   * Please see the License for the specific language governing rights and
   * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /*
   * @OSF_COPYRIGHT@
@@ -62,23 +67,22 @@
  #ifndef        _KERN_SCHED_H_
  #define _KERN_SCHED_H_
  
-#include <stat_time.h>
-
  #include <mach/policy.h>
  #include <kern/kern_types.h>
+#include <kern/smp.h>
  #include <kern/queue.h>
-#include <kern/lock.h>
  #include <kern/macro_help.h>
  #include <kern/timer_call.h>
  #include <kern/ast.h>
+#include <kern/kalloc.h>
+#include <kern/bits.h>
  
  #define        NRQS            128                             /* 128 levels per run queue */
-#define NRQBM          (NRQS / 32)             /* number of words per bit map */
  
  #define MAXPRI         (NRQS-1)
-#define MINPRI         IDLEPRI                 /* lowest legal priority schedulable */
-#define        IDLEPRI         0                               /* idle thread priority */
-#define DEPRESSPRI     MINPRI                  /* depress priority */
+#define MINPRI         0                               /* lowest legal priority schedulable */
+#define        IDLEPRI         MINPRI                          /* idle thread priority */
+#define        NOPRI           -1
  
  /*
   *     High-level priority assignments
@@ -141,96 +145,200 @@
  #define BASEPRI_REALTIME       (MAXPRI - (NRQS / 4) + 1)                       /* 96 */
  
  #define MAXPRI_KERNEL          (BASEPRI_REALTIME - 1)                          /* 95 */
-#define BASEPRI_PREEMPT                (MAXPRI_KERNEL - 2)                                     /* 93 */
-#define BASEPRI_KERNEL         (MINPRI_KERNEL + 1)                                     /* 81 */
-#define MINPRI_KERNEL          (MAXPRI_KERNEL - (NRQS / 8) + 1)        /* 80 */
+#define BASEPRI_PREEMPT_HIGH   (BASEPRI_PREEMPT + 1)                           /* 93 */
+#define BASEPRI_PREEMPT                (MAXPRI_KERNEL - 3)                             /* 92 */
+#define BASEPRI_VM             (BASEPRI_PREEMPT - 1)                           /* 91 */
+
+#define BASEPRI_KERNEL         (MINPRI_KERNEL + 1)                             /* 81 */
+#define MINPRI_KERNEL          (MAXPRI_KERNEL - (NRQS / 8) + 1)                /* 80 */
  
-#define MAXPRI_RESERVED                (MINPRI_KERNEL - 1)                                     /* 79 */
-#define MINPRI_RESERVED                (MAXPRI_RESERVED - (NRQS / 8) + 1)      /* 64 */
+#define MAXPRI_RESERVED                (MINPRI_KERNEL - 1)                             /* 79 */
+#define BASEPRI_GRAPHICS       (MAXPRI_RESERVED - 3)                           /* 76 */
+#define MINPRI_RESERVED                (MAXPRI_RESERVED - (NRQS / 8) + 1)              /* 64 */
  
-#define MAXPRI_USER                    (MINPRI_RESERVED - 1)                           /* 63 */
+#define MAXPRI_USER            (MINPRI_RESERVED - 1)                           /* 63 */
  #define BASEPRI_CONTROL                (BASEPRI_DEFAULT + 17)                          /* 48 */
  #define BASEPRI_FOREGROUND     (BASEPRI_DEFAULT + 16)                          /* 47 */
  #define BASEPRI_BACKGROUND     (BASEPRI_DEFAULT + 15)                          /* 46 */
+#define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT +  6)                          /* 37 */
  #define BASEPRI_DEFAULT                (MAXPRI_USER - (NRQS / 4))                      /* 31 */
-#define MINPRI_USER                    MINPRI                                                          /*  0 */
+#define MAXPRI_SUPPRESSED      (BASEPRI_DEFAULT - 3)                           /* 28 */
+#define BASEPRI_UTILITY                (BASEPRI_DEFAULT - 11)                          /* 20 */
+#define MAXPRI_THROTTLE                (MINPRI + 4)                                    /*  4 */
+#define MINPRI_USER            MINPRI                                          /*  0 */
+
+#define DEPRESSPRI             MINPRI                  /* depress priority */
+#define MAXPRI_PROMOTE         (MAXPRI_KERNEL)         /* ceiling for mutex promotion */
+
+/* Type used for thread->sched_mode and saved_mode */
+typedef enum {
+       TH_MODE_NONE = 0,                                       /* unassigned, usually for saved_mode only */
+       TH_MODE_REALTIME,                                       /* time constraints supplied */
+       TH_MODE_FIXED,                                          /* use fixed priorities, no decay */
+       TH_MODE_TIMESHARE,                                      /* use timesharing algorithm */
+} sched_mode_t;
+
+/* Buckets used for load calculation */
+typedef enum {
+       TH_BUCKET_RUN = 0,      /* All runnable threads */
+       TH_BUCKET_FIXPRI,       /* Fixed-priority */
+       TH_BUCKET_SHARE_FG,     /* Timeshare thread above BASEPRI_UTILITY */
+       TH_BUCKET_SHARE_UT,     /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
+       TH_BUCKET_SHARE_BG,     /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
+       TH_BUCKET_MAX,
+} sched_bucket_t;
  
  /*
   *     Macro to check for invalid priorities.
   */
  #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
  
+struct runq_stats {
+       uint64_t                                count_sum;
+       uint64_t                                last_change_timestamp;
+};
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
+
  struct run_queue {
         int                                     highq;                          /* highest runnable queue */
-       int                                     bitmap[NRQBM];          /* run queue bitmap array */
+       bitmap_t                                bitmap[BITMAP_LEN(NRQS)];       /* run queue bitmap array */
         int                                     count;                          /* # of threads total */
         int                                     urgency;                        /* level of preemption urgency */
         queue_head_t            queues[NRQS];           /* one for each priority */
+
+       struct runq_stats       runq_stats;
  };
  
-typedef struct run_queue       *run_queue_t;
-#define RUN_QUEUE_NULL         ((run_queue_t) 0)
+inline static void
+rq_bitmap_set(bitmap_t *map, u_int n)
+{
+       assert(n < NRQS);       
+       bitmap_set(map, n);
+}
+
+inline static void
+rq_bitmap_clear(bitmap_t *map, u_int n)
+{
+       assert(n < NRQS);       
+       bitmap_clear(map, n);
+}
+
+#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
+
+struct rt_queue {
+       _Atomic int             count;                          /* # of threads total */
+       queue_head_t            queue;                          /* all runnable RT threads */
+#if __SMP__
+       decl_simple_lock_data(,rt_lock)
+#endif
+       struct runq_stats       runq_stats;
+};
+typedef struct rt_queue *rt_queue_t;
  
-#define first_timeslice(processor)             ((processor)->timeslice > 0)
+#if defined(CONFIG_SCHED_GRRR_CORE)
  
-#define        processor_timeslice_setup(processor, thread)                    \
-MACRO_BEGIN                                                                                                            \
-       (processor)->timeslice =                                                                        \
-               ((thread)->sched_mode & TH_MODE_TIMESHARE)?                             \
-                       (processor)->processor_set->timeshare_quanta: 1;        \
-MACRO_END
+/*
+ * We map standard Mach priorities to an abstract scale that more properly
+ * indicates how we want processor time allocated under contention.
+ */
+typedef uint8_t        grrr_proportional_priority_t;
+typedef uint8_t grrr_group_index_t;
+
+#define NUM_GRRR_PROPORTIONAL_PRIORITIES       256
+#define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
+
+#if 0
+#define NUM_GRRR_GROUPS 8                                      /* log(256) */
+#endif
+
+#define NUM_GRRR_GROUPS 64                                     /* 256/4 */
+
+struct grrr_group {
+       queue_chain_t                   priority_order;                         /* next greatest weight group */
+       grrr_proportional_priority_t            minpriority;
+       grrr_group_index_t              index;
+
+       queue_head_t                    clients;
+       int                                             count;
+       uint32_t                                weight;
+#if 0
+       uint32_t                                deferred_removal_weight;
+#endif
+       uint32_t                                work;
+       thread_t                                current_client;
+};
+
+struct grrr_run_queue {
+       int                                     count;
+       uint32_t                        last_rescale_tick;
+       struct grrr_group       groups[NUM_GRRR_GROUPS];
+       queue_head_t            sorted_group_list;
+       uint32_t                        weight;
+       grrr_group_t            current_group;
+       
+       struct runq_stats   runq_stats;
+};
+
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
+
+extern int rt_runq_count(processor_set_t);
+extern void rt_runq_count_incr(processor_set_t);
+extern void rt_runq_count_decr(processor_set_t);
+
+#if defined(CONFIG_SCHED_MULTIQ)
+sched_group_t   sched_group_create(void);
+void            sched_group_destroy(sched_group_t sched_group);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
  
-#define thread_quantum_init(thread)                                                    \
-MACRO_BEGIN                                                                                                    \
-       (thread)->current_quantum =                                                     \
-               ((thread)->sched_mode & TH_MODE_REALTIME)?                      \
-                       (thread)->realtime.computation: std_quantum;    \
-MACRO_END
  
-/* Invoked at splsched by a thread on itself */
-#define csw_needed(thread, processor) (                                                                                \
-       ((thread)->state & TH_SUSP)                                                                             ||              \
-       (first_timeslice(processor)?                                                                                    \
-        ((processor)->runq.highq > (thread)->sched_pri                         ||                      \
-         (processor)->processor_set->runq.highq > (thread)->sched_pri) :               \
-        ((processor)->runq.highq >= (thread)->sched_pri                        ||                      \
-         (processor)->processor_set->runq.highq >= (thread)->sched_pri))       )
  
  /*
   *     Scheduler routines.
   */
  
-/* Remove thread from its run queue */
-extern run_queue_t     run_queue_remove(
-                                               thread_t        thread);
-
  /* Handle quantum expiration for an executing thread */
  extern void            thread_quantum_expire(
                                         timer_call_param_t      processor,
                                         timer_call_param_t      thread);
  
-/* Called at splsched by a thread on itself */
-extern ast_t   csw_check(
-                                       thread_t                thread,
-                                       processor_t             processor);
+/* Context switch check for current processor */
+extern ast_t   csw_check(processor_t           processor,
+                                               ast_t                   check_reason);
  
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
  extern uint32_t        std_quantum, min_std_quantum;
  extern uint32_t        std_quantum_us;
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint32_t thread_depress_time;
+extern uint32_t default_timeshare_computation;
+extern uint32_t default_timeshare_constraint;
  
  extern uint32_t        max_rt_quantum, min_rt_quantum;
  
-extern uint32_t        sched_cswtime;
+extern int default_preemption_rate;
+extern int default_bg_preemption_rate;
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
  
  /*
- *     Age usage (1 << SCHED_TICK_SHIFT) times per second.
+ *     Age usage  at approximately (1 << SCHED_TICK_SHIFT) times per second
+ *     Aging may be deferred during periods where all processors are idle
+ *     and cumulatively applied during periods of activity.
   */
  #define SCHED_TICK_SHIFT       3
+#define SCHED_TICK_MAX_DELTA   (8)
  
  extern unsigned                sched_tick;
  extern uint32_t                sched_tick_interval;
  
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+extern uint64_t                sched_one_second_interval;
+
  /* Periodic computation of various averages */
-extern void            compute_averages(void);
+extern void            compute_averages(uint64_t);
  
  extern void            compute_averunnable(
                                         void                    *nrun);
@@ -238,34 +346,54 @@ extern void               compute_averunnable(
  extern void            compute_stack_target(
                                         void                    *arg);
  
+extern void            compute_memory_pressure(
+                                       void                    *arg);
+
+extern void            compute_pageout_gc_throttle(
+                                       void                    *arg);
+
+extern void            compute_pmap_gc_throttle(
+                                       void                    *arg);
+
  /*
   *     Conversion factor from usage
   *     to priority.
   */
-extern uint32_t                sched_pri_shift;
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
  
-/*
- *     Scaling factor for usage
- *     based on load.
- */
+#define MAX_LOAD (NRQS - 1)
+extern uint32_t                sched_pri_shifts[TH_BUCKET_MAX];
+extern uint32_t                sched_fixed_shift;
  extern int8_t          sched_load_shifts[NRQS];
+extern uint32_t                sched_decay_usage_age_factor;
+void sched_timeshare_consider_maintenance(uint64_t ctime);
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
+void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
  
  extern int32_t         sched_poll_yield_shift;
-extern uint32_t                sched_safe_duration;
+extern uint64_t                sched_safe_duration;
+
+extern uint32_t                sched_load_average, sched_mach_factor;
+
+extern uint32_t                avenrun[3], mach_factor[3];
  
  extern uint64_t                max_unsafe_computation;
  extern uint64_t                max_poll_computation;
  
-extern uint32_t                avenrun[3], mach_factor[3];
+extern volatile uint32_t sched_run_buckets[TH_BUCKET_MAX];
+
+extern uint32_t sched_run_incr(thread_t thread);
+extern uint32_t sched_run_decr(thread_t thread);
  
  /*
   *     thread_timer_delta macro takes care of both thread timers.
   */
  #define thread_timer_delta(thread, delta)                                      \
  MACRO_BEGIN                                                                                                    \
-       (delta) = timer_delta(&(thread)->system_timer,                  \
+       (delta) = (typeof(delta))timer_delta(&(thread)->system_timer,                   \
                                                         &(thread)->system_timer_save);  \
-       (delta) += timer_delta(&(thread)->user_timer,                   \
+       (delta) += (typeof(delta))timer_delta(&(thread)->user_timer,                    \
                                                         &(thread)->user_timer_save);    \
  MACRO_END