osfmk/kern/sched_clutch.h

   1 /*
   2  * Copyright (c) 2018 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #ifndef _KERN_SCHED_CLUTCH_H_
  30 #define _KERN_SCHED_CLUTCH_H_
  31
  32 #include <kern/sched.h>
  33 #include <machine/atomic.h>
  34 #include <kern/priority_queue.h>
  35 #include <kern/thread_group.h>
  36 #include <kern/bits.h>
  37
  38 #if CONFIG_SCHED_CLUTCH
  39
  40 /*
  41  * Clutch ordering based on thread group flags (specified
  42  * by the thread grouping mechanism). These properties
  43  * define a thread group specific priority boost.
  44  *
  45  * The current implementation gives a slight boost to
  46  * HIGH & MED thread groups which effectively deprioritizes
  47  * daemon thread groups which are marked "Efficient" on AMP
  48  * systems.
  49  */
  50 __enum_decl(sched_clutch_tg_priority_t, uint8_t, {
  51         SCHED_CLUTCH_TG_PRI_LOW           = 0,
  52         SCHED_CLUTCH_TG_PRI_MED           = 1,
  53         SCHED_CLUTCH_TG_PRI_HIGH          = 2,
  54         SCHED_CLUTCH_TG_PRI_MAX           = 3,
  55 });
  56
  57 /*
  58  * For the current implementation, bound threads are not managed
  59  * in the clutch hierarchy. This helper macro is used to indicate
  60  * if the thread should be in the hierarchy.
  61  */
  62 #define SCHED_CLUTCH_THREAD_ELIGIBLE(thread)    ((thread->bound_processor) == PROCESSOR_NULL)
  63
  64 #if CONFIG_SCHED_EDGE
  65 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       ((thread->sched_flags & (TH_SFLAG_ECORE_ONLY | TH_SFLAG_PCORE_ONLY)) != 0)
  66 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND_SOFT(thread)  ((thread->sched_flags & TH_SFLAG_BOUND_SOFT) != 0)
  67
  68 #else /* CONFIG_SCHED_EDGE */
  69 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       (0)
  70 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND_SOFT(thread)  (0)
  71 #endif /* CONFIG_SCHED_EDGE */
  72
  73 /*
  74  * Clutch Bucket Runqueue Structure.
  75  */
  76 struct sched_clutch_bucket_runq {
  77         int                     scbrq_highq;
  78         int                     scbrq_count;
  79         bitmap_t                scbrq_bitmap[BITMAP_LEN(NRQS_MAX)];
  80         circle_queue_head_t     scbrq_queues[NRQS_MAX];
  81 };
  82 typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t;
  83
  84 /*
  85  *
  86  * Clutch hierarchy locking protocol
  87  *
  88  * The scheduler clutch hierarchy is protected by a combination of
  89  * atomics and pset lock.
  90  * - All fields protected by the pset lock are annotated with (P)
  91  * - All fields updated using atomics are annotated with (A)
  92  * - All fields that are unprotected and are not updated after
  93  *   initialization are annotated with (I)
  94  */
  95
  96 /*
  97  * struct sched_clutch_root_bucket
  98  *
  99  * A clutch_root_bucket represents all threads across all thread groups
 100  * that are in the same scheduler bucket (FG/IN/...). The clutch_root_bucket
 101  * is selected for execution by the root level bucket selection algorithm
 102  * which bases the decision on the clutch_root_bucket's deadline (EDF). The
 103  * deadline for a root bucket is calculated based on its runnable timestamp
 104  * and the worst-case-execution-latency values specied in sched_clutch_root_bucket_wcel[]
 105  */
 106 struct sched_clutch_root_bucket {
 107         /* (I) sched bucket represented by this root bucket */
 108         uint8_t                         scrb_bucket;
 109         /* (I) Indicates the root bucket represents cluster bound threads */
 110         bool                            scrb_bound;
 111         /* (P) Indicates if the root bucket is in starvation avoidance mode */
 112         bool                            scrb_starvation_avoidance;
 113
 114         union {
 115                 /* (P) priority queue for all unbound clutch buckets in this sched bucket */
 116                 struct sched_clutch_bucket_runq scrb_clutch_buckets;
 117                 /* (P) Runqueue for all bound threads part of this root bucket */
 118                 struct run_queue                scrb_bound_thread_runq;
 119         };
 120         /* (P) priority queue entry to use for enqueueing root bucket into root prioq */
 121         struct priority_queue_entry_deadline scrb_pqlink;
 122         /* (P) warped deadline for root bucket */
 123         uint64_t                        scrb_warped_deadline;
 124         /* (P) warp remaining for root bucket */
 125         uint64_t                        scrb_warp_remaining;
 126         /* (P) timestamp for the start of the starvation avoidance window */
 127         uint64_t                        scrb_starvation_ts;
 128 };
 129 typedef struct sched_clutch_root_bucket *sched_clutch_root_bucket_t;
 130
 131 /*
 132  * struct sched_clutch_root
 133  *
 134  * A clutch_root represents the root of the hierarchy. It maintains a
 135  * priority queue of all runnable root buckets. The clutch_root also
 136  * maintains the information about the last clutch_root_bucket scheduled
 137  * in order to implement bucket level quantum. The bucket level quantums
 138  * allow low priority buckets to get a "fair" chance of using the CPU even
 139  * if they contain a bunch of short executing threads. The bucket quantums
 140  * are configured using sched_clutch_root_bucket_quantum[]
 141  */
 142 struct sched_clutch_root {
 143         /* (P) root level priority; represents the highest runnable thread in the hierarchy */
 144         int16_t                         scr_priority;
 145         /* (P) total number of runnable threads in the hierarchy */
 146         uint16_t                        scr_thr_count;
 147         /* (P) root level urgency; represents the urgency of the whole hierarchy for pre-emption purposes */
 148         int16_t                         scr_urgency;
 149
 150         uint32_t                        scr_cluster_id;
 151         /* (I) processor set this hierarchy belongs to */
 152         processor_set_t                 scr_pset;
 153         /*
 154          * (P) list of all runnable clutch buckets across the system;
 155          * allows easy iteration in the sched tick based timesharing code
 156          */
 157         queue_head_t                    scr_clutch_buckets;
 158
 159         /*
 160          * (P) priority queue of all runnable foreign buckets in this hierarchy;
 161          * used for tracking thread groups which need to be migrated when
 162          * psets are available or rebalancing threads on CPU idle.
 163          */
 164         struct priority_queue_sched_max scr_foreign_buckets;
 165
 166         /* Root level bucket management */
 167
 168         /* (P) bitmap of all runnable unbounded root buckets */
 169         bitmap_t                        scr_unbound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
 170         /* (P) bitmap of all runnable unbounded root buckets which have warps remaining */
 171         bitmap_t                        scr_unbound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
 172         /* (P) bitmap of all runnable bounded root buckets */
 173         bitmap_t                        scr_bound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
 174         /* (P) bitmap of all runnable bounded root buckets which have warps remaining */
 175         bitmap_t                        scr_bound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
 176
 177         /* (P) priority queue of all runnable unbounded root buckets in deadline order */
 178         struct priority_queue_deadline_min scr_unbound_root_buckets;
 179         /* (P) priority queue of all bounded root buckets in deadline order */
 180         struct priority_queue_deadline_min scr_bound_root_buckets;
 181
 182         /* (P) cumulative run counts at each bucket for load average calculation */
 183         uint16_t _Atomic                scr_cumulative_run_count[TH_BUCKET_SCHED_MAX];
 184
 185         /* (P) storage for all unbound clutch_root_buckets */
 186         struct sched_clutch_root_bucket scr_unbound_buckets[TH_BUCKET_SCHED_MAX];
 187         /* (P) storage for all bound clutch_root_buckets */
 188         struct sched_clutch_root_bucket scr_bound_buckets[TH_BUCKET_SCHED_MAX];
 189 };
 190 typedef struct sched_clutch_root *sched_clutch_root_t;
 191
 192 /* forward declaration for sched_clutch */
 193 struct sched_clutch;
 194
 195 /*
 196  * sched_clutch_bucket_cpu_data_t
 197  *
 198  * Used for maintaining clutch bucket used and blocked time. The
 199  * values are used for calculating the interactivity score for the
 200  * clutch bucket.
 201  *
 202  * Since the CPU used/blocked calculation uses wide atomics, the data
 203  * types used are different based on the platform.
 204  */
 205
 206 #if __LP64__
 207
 208 #define CLUTCH_CPU_DATA_MAX             (UINT64_MAX)
 209 typedef uint64_t                        clutch_cpu_data_t;
 210 typedef unsigned __int128               clutch_cpu_data_wide_t;
 211
 212 #else /* __LP64__ */
 213
 214 #define CLUTCH_CPU_DATA_MAX             (UINT32_MAX)
 215 typedef uint32_t                        clutch_cpu_data_t;
 216 typedef uint64_t                        clutch_cpu_data_wide_t;
 217
 218 #endif /* __LP64__ */
 219
 220 typedef union sched_clutch_bucket_cpu_data {
 221         struct {
 222                 /* Clutch bucket CPU used across all threads */
 223                 clutch_cpu_data_t       scbcd_cpu_used;
 224                 /* Clutch bucket voluntary blocked time */
 225                 clutch_cpu_data_t       scbcd_cpu_blocked;
 226         } cpu_data;
 227         clutch_cpu_data_wide_t          scbcd_cpu_data_packed;
 228 } sched_clutch_bucket_cpu_data_t;
 229
 230 /*
 231  * struct sched_clutch_bucket
 232  *
 233  * A sched_clutch_bucket represents the set of threads for a thread
 234  * group at a particular scheduling bucket in a specific cluster.
 235  * It maintains information about the CPU usage & blocking behavior
 236  * of all threads part of the clutch_bucket. It inherits the timeshare
 237  * values from the clutch_bucket_group for decay and timesharing among
 238  * threads in the clutch.
 239  *
 240  * Since the clutch bucket is a per thread group per-QoS entity it is
 241  * important to keep its size small and the structure well aligned.
 242  */
 243 struct sched_clutch_bucket {
 244 #if CONFIG_SCHED_EDGE
 245         /* (P) flag to indicate if the bucket is a foreign bucket */
 246         bool                            scb_foreign;
 247 #endif /* CONFIG_SCHED_EDGE */
 248         /* (I) bucket for the clutch_bucket */
 249         uint8_t                         scb_bucket;
 250         /* (P) priority of the clutch bucket */
 251         uint8_t                         scb_priority;
 252         /* (P) number of threads in this clutch_bucket; should match runq.count */
 253         uint16_t                        scb_thr_count;
 254
 255         /* Pointer to the clutch bucket group this clutch bucket belongs to */
 256         struct sched_clutch_bucket_group *scb_group;
 257         /* (A) pointer to the root of the hierarchy this bucket is in */
 258         struct sched_clutch_root        *scb_root;
 259         /* (P) priority queue of threads based on their promoted/base priority */
 260         struct priority_queue_sched_max scb_clutchpri_prioq;
 261         /* (P) runq of threads in clutch_bucket */
 262         struct priority_queue_sched_stable_max scb_thread_runq;
 263
 264         /* (P) linkage for all clutch_buckets in a root bucket; used for tick operations */
 265         queue_chain_t                   scb_listlink;
 266         /* (P) linkage for clutch_bucket in root_bucket runqueue */
 267         queue_chain_t                   scb_runqlink;
 268         /* (P) queue of threads for timesharing purposes */
 269         queue_head_t                    scb_thread_timeshare_queue;
 270 #if CONFIG_SCHED_EDGE
 271         /* (P) linkage for all "foreign" clutch buckets in the root clutch */
 272         struct priority_queue_entry_sched     scb_foreignlink;
 273 #endif /* CONFIG_SCHED_EDGE */
 274 };
 275 typedef struct sched_clutch_bucket *sched_clutch_bucket_t;
 276
 277 /*
 278  * sched_clutch_counter_time_t
 279  *
 280  * Holds thread counts and a timestamp (typically for a clutch bucket group).
 281  * Used to allow atomic updates to these fields.
 282  */
 283 typedef union sched_clutch_counter_time {
 284         struct {
 285                 uint64_t                scct_count;
 286                 uint64_t                scct_timestamp;
 287         };
 288 #if __LP64__
 289         unsigned __int128               scct_packed;
 290 #endif /* __LP64__ */
 291 } __attribute__((aligned(16))) sched_clutch_counter_time_t;
 292
 293 /*
 294  * struct sched_clutch_bucket_group
 295  *
 296  * It represents all the threads for a thread group at a particular
 297  * QoS/Scheduling bucket. This structure also maintains the timesharing
 298  * properties that are used for decay calculation for all threads in the
 299  * thread group at the specific scheduling bucket.
 300  */
 301 struct sched_clutch_bucket_group {
 302         /* (I) bucket for the clutch_bucket_group */
 303         uint8_t                         scbg_bucket;
 304         /* (A) sched tick when the clutch bucket group load/shifts were updated */
 305         uint32_t _Atomic                scbg_timeshare_tick;
 306         /* (A) priority shifts for threads in the clutch_bucket_group */
 307         uint32_t _Atomic                scbg_pri_shift;
 308         /* (A) preferred cluster ID for clutch bucket */
 309         uint32_t _Atomic                scbg_preferred_cluster;
 310         /* (I) clutch to which this clutch bucket_group belongs */
 311         struct sched_clutch             *scbg_clutch;
 312 #if !__LP64__
 313         /* Lock for synchronizing updates to blocked data (only on platforms without 128-atomics) */
 314         lck_spin_t                      scbg_stats_lock;
 315 #endif /* !__LP64__ */
 316         /* (A/L depending on arch) holds blcked timestamp and runnable/running count */
 317         sched_clutch_counter_time_t     scbg_blocked_data;
 318         /* (P/A depending on scheduler) holds pending timestamp and thread count */
 319         sched_clutch_counter_time_t     scbg_pending_data;
 320         /* (P/A depending on scheduler) holds interactivity timestamp and score */
 321         sched_clutch_counter_time_t     scbg_interactivity_data;
 322         /* (A) CPU usage information for the clutch bucket group */
 323         sched_clutch_bucket_cpu_data_t  scbg_cpu_data;
 324
 325         /*
 326          * Edge Scheduler Optimization
 327          *
 328          * Currently the array is statically sized based on MAX_PSETS.
 329          * If that definition does not exist (or has a large theoretical
 330          * max value), this could be a dynamic array based on ml_topology_info*
 331          * routines.
 332          *
 333          * <Edge Multi-cluster Support Needed>
 334          */
 335         /* Storage for all clutch buckets for a thread group at scbg_bucket */
 336         struct sched_clutch_bucket      scbg_clutch_buckets[MAX_PSETS];
 337 };
 338 typedef struct sched_clutch_bucket_group *sched_clutch_bucket_group_t;
 339
 340
 341 /*
 342  * struct sched_clutch
 343  *
 344  * A sched_clutch is a 1:1 mapping to a thread group. It maintains the
 345  * storage for all clutch buckets for this thread group and some properties
 346  * of the thread group (such as flags etc.)
 347  */
 348 struct sched_clutch {
 349         /*
 350          * (A) number of runnable threads in sched_clutch; needs to be atomic
 351          * to support cross cluster sched_clutch migrations.
 352          */
 353         uint16_t _Atomic                sc_thr_count;
 354         /*
 355          * Grouping specific parameters. Currently the implementation only
 356          * supports thread_group based grouping.
 357          */
 358         union {
 359                 /* (A) priority specified by the thread grouping mechanism */
 360                 sched_clutch_tg_priority_t _Atomic sc_tg_priority;
 361         };
 362         union {
 363                 /* (I) Pointer to thread group */
 364                 struct thread_group     *sc_tg;
 365         };
 366         /* (I) storage for all clutch_buckets for this clutch */
 367         struct sched_clutch_bucket_group sc_clutch_groups[TH_BUCKET_SCHED_MAX];
 368 };
 369 typedef struct sched_clutch *sched_clutch_t;
 370
 371
 372 /* Clutch lifecycle management */
 373 void sched_clutch_init_with_thread_group(sched_clutch_t, struct thread_group *);
 374 void sched_clutch_destroy(sched_clutch_t);
 375
 376 /* Clutch thread membership management */
 377 void sched_clutch_thread_clutch_update(thread_t, sched_clutch_t, sched_clutch_t);
 378 uint32_t sched_edge_thread_preferred_cluster(thread_t);
 379
 380 /* Clutch timesharing stats management */
 381 uint32_t sched_clutch_thread_run_bucket_incr(thread_t, sched_bucket_t);
 382 uint32_t sched_clutch_thread_run_bucket_decr(thread_t, sched_bucket_t);
 383 void sched_clutch_cpu_usage_update(thread_t, uint64_t);
 384 uint32_t sched_clutch_thread_pri_shift(thread_t, sched_bucket_t);
 385
 386 /* Clutch properties accessors */
 387 uint32_t sched_clutch_root_count(sched_clutch_root_t);
 388
 389 /* Grouping specific external routines */
 390 extern sched_clutch_t sched_clutch_for_thread(thread_t);
 391 extern sched_clutch_t sched_clutch_for_thread_group(struct thread_group *);
 392
 393 #if CONFIG_SCHED_EDGE
 394
 395 /*
 396  * Getter and Setter for Edge configuration. Used by CLPC to affect thread migration behavior.
 397  */
 398 void sched_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order);
 399 void sched_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order);
 400 void sched_edge_tg_preferred_cluster_change(struct thread_group *tg, uint32_t *tg_bucket_preferred_cluster, sched_perfcontrol_preferred_cluster_options_t options);
 401
 402 uint16_t sched_edge_cluster_cumulative_count(sched_clutch_root_t root_clutch, sched_bucket_t bucket);
 403
 404 #if DEVELOPMENT || DEBUG
 405 /*
 406  * Sysctl support for dynamically configuring edge properties.
 407  *
 408  * <Edge Multi-cluster Support Needed>
 409  */
 410 kern_return_t sched_edge_sysctl_configure_e_to_p(uint64_t);
 411 kern_return_t sched_edge_sysctl_configure_p_to_e(uint64_t);
 412 sched_clutch_edge sched_edge_e_to_p(void);
 413 sched_clutch_edge sched_edge_p_to_e(void);
 414 #endif /* DEVELOPMENT || DEBUG */
 415
 416 #endif /* CONFIG_SCHED_EDGE */
 417
 418 #endif /* CONFIG_SCHED_CLUTCH */
 419
 420 #endif /* _KERN_SCHED_CLUTCH_H_ */