osfmk/kern/sched.h

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   sched.h
  60  *      Author: Avadis Tevanian, Jr.
  61  *      Date:   1985
  62  *
  63  *      Header file for scheduler.
  64  *
  65  */
  66
  67 #ifndef _KERN_SCHED_H_
  68 #define _KERN_SCHED_H_
  69
  70 #include <mach/policy.h>
  71 #include <kern/kern_types.h>
  72 #include <kern/smp.h>
  73 #include <kern/circle_queue.h>
  74 #include <kern/macro_help.h>
  75 #include <kern/timer_call.h>
  76 #include <kern/ast.h>
  77 #include <kern/bits.h>
  78
  79 #define NRQS_MAX        (128)                           /* maximum number of priority levels */
  80
  81 #define MAXPRI          (NRQS_MAX-1)
  82 #define MINPRI          0                               /* lowest legal priority schedulable */
  83 #define IDLEPRI         MINPRI                          /* idle thread priority */
  84 #define NOPRI           -1
  85
  86 /*
  87  *      High-level priority assignments
  88  *
  89  *************************************************************************
  90  * 127          Reserved (real-time)
  91  *                              A
  92  *                              +
  93  *                      (32 levels)
  94  *                              +
  95  *                              V
  96  * 96           Reserved (real-time)
  97  * 95           Kernel mode only
  98  *                              A
  99  *                              +
 100  *                      (16 levels)
 101  *                              +
 102  *                              V
 103  * 80           Kernel mode only
 104  * 79           System high priority
 105  *                              A
 106  *                              +
 107  *                      (16 levels)
 108  *                              +
 109  *                              V
 110  * 64           System high priority
 111  * 63           Elevated priorities
 112  *                              A
 113  *                              +
 114  *                      (12 levels)
 115  *                              +
 116  *                              V
 117  * 52           Elevated priorities
 118  * 51           Elevated priorities (incl. BSD +nice)
 119  *                              A
 120  *                              +
 121  *                      (20 levels)
 122  *                              +
 123  *                              V
 124  * 32           Elevated priorities (incl. BSD +nice)
 125  * 31           Default (default base for threads)
 126  * 30           Lowered priorities (incl. BSD -nice)
 127  *                              A
 128  *                              +
 129  *                      (20 levels)
 130  *                              +
 131  *                              V
 132  * 11           Lowered priorities (incl. BSD -nice)
 133  * 10           Lowered priorities (aged pri's)
 134  *                              A
 135  *                              +
 136  *                      (11 levels)
 137  *                              +
 138  *                              V
 139  * 0            Lowered priorities (aged pri's / idle)
 140  *************************************************************************
 141  */
 142
 143 #define BASEPRI_RTQUEUES        (BASEPRI_REALTIME + 1)                          /* 97 */
 144 #define BASEPRI_REALTIME        (MAXPRI - (NRQS_MAX / 4) + 1)                   /* 96 */
 145
 146 #define MAXPRI_KERNEL           (BASEPRI_REALTIME - 1)                          /* 95 */
 147 #define BASEPRI_PREEMPT_HIGH    (BASEPRI_PREEMPT + 1)                           /* 93 */
 148 #define BASEPRI_PREEMPT         (MAXPRI_KERNEL - 3)                             /* 92 */
 149 #define BASEPRI_VM              (BASEPRI_PREEMPT - 1)                           /* 91 */
 150
 151 #define BASEPRI_KERNEL          (MINPRI_KERNEL + 1)                             /* 81 */
 152 #define MINPRI_KERNEL           (MAXPRI_KERNEL - (NRQS_MAX / 8) + 1)            /* 80 */
 153
 154 #define MAXPRI_RESERVED         (MINPRI_KERNEL - 1)                             /* 79 */
 155 #define BASEPRI_GRAPHICS        (MAXPRI_RESERVED - 3)                           /* 76 */
 156 #define MINPRI_RESERVED         (MAXPRI_RESERVED - (NRQS_MAX / 8) + 1)          /* 64 */
 157
 158 #define MAXPRI_USER             (MINPRI_RESERVED - 1)                           /* 63 */
 159 #define BASEPRI_CONTROL         (BASEPRI_DEFAULT + 17)                          /* 48 */
 160 #define BASEPRI_FOREGROUND      (BASEPRI_DEFAULT + 16)                          /* 47 */
 161 #define BASEPRI_BACKGROUND      (BASEPRI_DEFAULT + 15)                          /* 46 */
 162 #define BASEPRI_USER_INITIATED  (BASEPRI_DEFAULT +  6)                          /* 37 */
 163 #define BASEPRI_DEFAULT         (MAXPRI_USER - (NRQS_MAX / 4))                  /* 31 */
 164 #define MAXPRI_SUPPRESSED       (BASEPRI_DEFAULT - 3)                           /* 28 */
 165 #define BASEPRI_UTILITY         (BASEPRI_DEFAULT - 11)                          /* 20 */
 166 #define MAXPRI_THROTTLE         (MINPRI + 4)                                    /*  4 */
 167 #define MINPRI_USER             MINPRI                                          /*  0 */
 168
 169 #define DEPRESSPRI              (MINPRI)                /* depress priority */
 170
 171 #define MAXPRI_PROMOTE          (MAXPRI_KERNEL)         /* ceiling for mutex promotion */
 172 #define MINPRI_RWLOCK           (BASEPRI_BACKGROUND)    /* floor when holding rwlock count */
 173 #define MINPRI_EXEC             (BASEPRI_DEFAULT)       /* floor when in exec state */
 174 #define MINPRI_WAITQ            (BASEPRI_DEFAULT)       /* floor when in waitq handover state */
 175
 176 #define NRQS                    (BASEPRI_REALTIME)      /* Non-realtime levels for runqs */
 177
 178 /* Ensure that NRQS is large enough to represent all non-realtime threads; even promoted ones */
 179 _Static_assert((NRQS == (MAXPRI_PROMOTE + 1)), "Runqueues are too small to hold all non-realtime threads");
 180
 181 /* Type used for thread->sched_mode and saved_mode */
 182 typedef enum {
 183         TH_MODE_NONE = 0,                                       /* unassigned, usually for saved_mode only */
 184         TH_MODE_REALTIME,                                       /* time constraints supplied */
 185         TH_MODE_FIXED,                                          /* use fixed priorities, no decay */
 186         TH_MODE_TIMESHARE,                                      /* use timesharing algorithm */
 187 } sched_mode_t;
 188
 189 /*
 190  * Since the clutch scheduler organizes threads based on the thread group
 191  * and the scheduling bucket, its important to not mix threads from multiple
 192  * priority bands into the same bucket. To achieve that, in the clutch bucket
 193  * world, there is a scheduling bucket per QoS effectively.
 194  */
 195
 196 /* Buckets used for load calculation */
 197 typedef enum {
 198         TH_BUCKET_FIXPRI = 0,                   /* Fixed-priority */
 199         TH_BUCKET_SHARE_FG,                     /* Timeshare thread above BASEPRI_DEFAULT */
 200 #if CONFIG_SCHED_CLUTCH
 201         TH_BUCKET_SHARE_IN,                     /* Timeshare thread between BASEPRI_USER_INITIATED and BASEPRI_DEFAULT */
 202 #endif /* CONFIG_SCHED_CLUTCH */
 203         TH_BUCKET_SHARE_DF,                     /* Timeshare thread between BASEPRI_DEFAULT and BASEPRI_UTILITY */
 204         TH_BUCKET_SHARE_UT,                     /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
 205         TH_BUCKET_SHARE_BG,                     /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
 206         TH_BUCKET_RUN,                          /* All runnable threads */
 207         TH_BUCKET_SCHED_MAX = TH_BUCKET_RUN,    /* Maximum schedulable buckets */
 208         TH_BUCKET_MAX,
 209 } sched_bucket_t;
 210
 211 /*
 212  *      Macro to check for invalid priorities.
 213  */
 214 #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
 215
 216 struct runq_stats {
 217         uint64_t                count_sum;
 218         uint64_t                last_change_timestamp;
 219 };
 220
 221 #if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
 222
 223 struct run_queue {
 224         int                     highq;                          /* highest runnable queue */
 225         bitmap_t                bitmap[BITMAP_LEN(NRQS)];       /* run queue bitmap array */
 226         int                     count;                          /* # of threads total */
 227         int                     urgency;                        /* level of preemption urgency */
 228         circle_queue_head_t     queues[NRQS];           /* one for each priority */
 229
 230         struct runq_stats       runq_stats;
 231 };
 232
 233 inline static void
 234 rq_bitmap_set(bitmap_t *map, u_int n)
 235 {
 236         assert(n < NRQS);
 237         bitmap_set(map, n);
 238 }
 239
 240 inline static void
 241 rq_bitmap_clear(bitmap_t *map, u_int n)
 242 {
 243         assert(n < NRQS);
 244         bitmap_clear(map, n);
 245 }
 246
 247 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
 248
 249 struct rt_queue {
 250         _Atomic int             count;                          /* # of threads total */
 251         queue_head_t            queue;                          /* all runnable RT threads */
 252         struct runq_stats       runq_stats;
 253 };
 254 typedef struct rt_queue *rt_queue_t;
 255
 256 #if defined(CONFIG_SCHED_GRRR_CORE)
 257
 258 /*
 259  * We map standard Mach priorities to an abstract scale that more properly
 260  * indicates how we want processor time allocated under contention.
 261  */
 262 typedef uint8_t grrr_proportional_priority_t;
 263 typedef uint8_t grrr_group_index_t;
 264
 265 #define NUM_GRRR_PROPORTIONAL_PRIORITIES        256
 266 #define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
 267
 268 #if 0
 269 #define NUM_GRRR_GROUPS 8                                       /* log(256) */
 270 #endif
 271
 272 #define NUM_GRRR_GROUPS 64                                      /* 256/4 */
 273
 274 struct grrr_group {
 275         queue_chain_t                   priority_order;                         /* next greatest weight group */
 276         grrr_proportional_priority_t            minpriority;
 277         grrr_group_index_t              index;
 278
 279         queue_head_t                    clients;
 280         int                                             count;
 281         uint32_t                                weight;
 282 #if 0
 283         uint32_t                                deferred_removal_weight;
 284 #endif
 285         uint32_t                                work;
 286         thread_t                                current_client;
 287 };
 288
 289 struct grrr_run_queue {
 290         int                                     count;
 291         uint32_t                        last_rescale_tick;
 292         struct grrr_group       groups[NUM_GRRR_GROUPS];
 293         queue_head_t            sorted_group_list;
 294         uint32_t                        weight;
 295         grrr_group_t            current_group;
 296
 297         struct runq_stats   runq_stats;
 298 };
 299
 300 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
 301
 302 extern int rt_runq_count(processor_set_t);
 303 extern void rt_runq_count_incr(processor_set_t);
 304 extern void rt_runq_count_decr(processor_set_t);
 305
 306 #if defined(CONFIG_SCHED_MULTIQ)
 307 sched_group_t   sched_group_create(void);
 308 void            sched_group_destroy(sched_group_t sched_group);
 309 #endif /* defined(CONFIG_SCHED_MULTIQ) */
 310
 311
 312
 313 /*
 314  *      Scheduler routines.
 315  */
 316
 317 /* Handle quantum expiration for an executing thread */
 318 extern void             thread_quantum_expire(
 319         timer_call_param_t      processor,
 320         timer_call_param_t      thread);
 321
 322 /* Context switch check for current processor */
 323 extern ast_t    csw_check(
 324         thread_t      thread,
 325         processor_t   processor,
 326         ast_t         check_reason);
 327
 328 /* Check for pending ASTs */
 329 extern void ast_check(processor_t processor);
 330
 331 extern void sched_update_generation_count(void);
 332
 333 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 334 extern uint32_t std_quantum, min_std_quantum;
 335 extern uint32_t std_quantum_us;
 336 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 337
 338 extern uint32_t thread_depress_time;
 339 extern uint32_t default_timeshare_computation;
 340 extern uint32_t default_timeshare_constraint;
 341
 342 extern uint32_t max_rt_quantum, min_rt_quantum;
 343
 344 extern int default_preemption_rate;
 345
 346 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 347
 348 /*
 349  *      Age usage  at approximately (1 << SCHED_TICK_SHIFT) times per second
 350  *      Aging may be deferred during periods where all processors are idle
 351  *      and cumulatively applied during periods of activity.
 352  */
 353 #define SCHED_TICK_SHIFT        3
 354 #define SCHED_TICK_MAX_DELTA    (8)
 355
 356 extern unsigned         sched_tick;
 357 extern uint32_t         sched_tick_interval;
 358
 359 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 360
 361 extern uint64_t         sched_one_second_interval;
 362
 363 /* Periodic computation of various averages */
 364 extern void            compute_sched_load(void);
 365
 366 extern void             compute_averages(uint64_t);
 367
 368 extern void             compute_averunnable(
 369         void                    *nrun);
 370
 371 extern void             compute_stack_target(
 372         void                    *arg);
 373
 374 extern void             compute_pageout_gc_throttle(
 375         void                    *arg);
 376
 377 extern void             compute_pmap_gc_throttle(
 378         void                    *arg);
 379
 380 /*
 381  *      Conversion factor from usage
 382  *      to priority.
 383  */
 384 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 385
 386 #define MAX_LOAD (NRQS - 1)
 387 #define SCHED_PRI_SHIFT_MAX ((8 * sizeof(uint32_t)) - 1)
 388 extern uint32_t         sched_pri_shifts[TH_BUCKET_MAX];
 389 extern uint32_t         sched_fixed_shift;
 390 extern int8_t           sched_load_shifts[NRQS];
 391 extern uint32_t         sched_decay_usage_age_factor;
 392 void sched_timeshare_consider_maintenance(uint64_t ctime);
 393 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 394
 395 void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
 396
 397 extern int32_t          sched_poll_yield_shift;
 398 extern uint64_t         sched_safe_duration;
 399
 400 extern uint32_t         sched_load_average, sched_mach_factor;
 401
 402 extern uint32_t         avenrun[3], mach_factor[3];
 403
 404 extern uint64_t         max_unsafe_computation;
 405 extern uint64_t         max_poll_computation;
 406
 407 extern uint32_t         sched_run_buckets[TH_BUCKET_MAX];
 408
 409 extern uint32_t sched_run_incr(thread_t thread);
 410 extern uint32_t sched_run_decr(thread_t thread);
 411 extern void sched_update_thread_bucket(thread_t thread);
 412
 413 extern uint32_t sched_smt_run_incr(thread_t thread);
 414 extern uint32_t sched_smt_run_decr(thread_t thread);
 415 extern void sched_smt_update_thread_bucket(thread_t thread);
 416
 417 #define SCHED_DECAY_TICKS       32
 418 struct shift_data {
 419         int     shift1;
 420         int     shift2;
 421 };
 422
 423 /*
 424  *      thread_timer_delta macro takes care of both thread timers.
 425  */
 426 #define thread_timer_delta(thread, delta)                                       \
 427 MACRO_BEGIN                                                                     \
 428         (delta) = (typeof(delta))timer_delta(&(thread)->system_timer,           \
 429             &(thread)->system_timer_save);                                      \
 430         (delta) += (typeof(delta))timer_delta(&(thread)->user_timer,            \
 431             &(thread)->user_timer_save);                                        \
 432 MACRO_END
 433
 434 extern bool system_is_SMT;
 435
 436 #endif  /* _KERN_SCHED_H_ */