2 * Copyright (c) 2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach/mach_types.h>
30 #include <mach/machine.h>
31 #include <machine/machine_routines.h>
32 #include <machine/sched_param.h>
33 #include <machine/machine_cpu.h>
34 #include <kern/kern_types.h>
35 #include <kern/debug.h>
36 #include <kern/machine.h>
37 #include <kern/misc_protos.h>
38 #include <kern/processor.h>
39 #include <kern/queue.h>
40 #include <kern/sched.h>
41 #include <kern/sched_prim.h>
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/sched_clutch.h>
45 #include <machine/atomic.h>
46 #include <kern/sched_clutch.h>
47 #include <sys/kdebug.h>
50 #include <kern/sched_amp_common.h>
53 #if CONFIG_SCHED_CLUTCH
55 /* Forward declarations of static routines */
57 /* Root level hierarchy management */
58 static void sched_clutch_root_init(sched_clutch_root_t
, processor_set_t
);
59 static void sched_clutch_root_bucket_init(sched_clutch_root_bucket_t
, sched_bucket_t
);
60 static void sched_clutch_root_pri_update(sched_clutch_root_t
);
61 static sched_clutch_root_bucket_t
sched_clutch_root_highest_root_bucket(sched_clutch_root_t
, uint64_t);
62 static void sched_clutch_root_urgency_inc(sched_clutch_root_t
, thread_t
);
63 static void sched_clutch_root_urgency_dec(sched_clutch_root_t
, thread_t
);
65 /* Root bucket level hierarchy management */
66 static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bucket_t
, uint64_t);
67 static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t
, sched_clutch_root_t
, uint64_t);
68 static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t
, sched_clutch_root_bucket_t
);
70 /* Options for clutch bucket ordering in the runq */
71 __options_decl(sched_clutch_bucket_options_t
, uint32_t, {
72 SCHED_CLUTCH_BUCKET_OPTIONS_NONE
= 0x0,
73 /* Round robin clutch bucket on thread removal */
74 SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR
= 0x1,
75 /* Insert clutch bucket at head (for thread preemption) */
76 SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ
= 0x2,
77 /* Insert clutch bucket at tail (default) */
78 SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ
= 0x4,
81 /* Clutch bucket level hierarchy management */
82 static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t
, sched_clutch_bucket_t
, sched_bucket_t
, uint64_t, sched_clutch_bucket_options_t
);
83 static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t
, sched_clutch_bucket_t
, sched_bucket_t
, uint64_t, sched_clutch_bucket_options_t
);
84 static boolean_t
sched_clutch_bucket_runnable(sched_clutch_bucket_t
, sched_clutch_root_t
, uint64_t, sched_clutch_bucket_options_t
);
85 static boolean_t
sched_clutch_bucket_update(sched_clutch_bucket_t
, sched_clutch_root_t
, uint64_t, sched_clutch_bucket_options_t
);
86 static void sched_clutch_bucket_empty(sched_clutch_bucket_t
, sched_clutch_root_t
, uint64_t, sched_clutch_bucket_options_t
);
88 static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t
, uint64_t);
89 static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t
, uint64_t);
90 static uint8_t sched_clutch_bucket_pri_calculate(sched_clutch_bucket_t
, uint64_t);
91 static sched_clutch_bucket_t
sched_clutch_root_bucket_highest_clutch_bucket(sched_clutch_root_bucket_t
);
93 /* Clutch timeshare properties updates */
94 static uint32_t sched_clutch_run_bucket_incr(sched_clutch_t
, sched_bucket_t
);
95 static uint32_t sched_clutch_run_bucket_decr(sched_clutch_t
, sched_bucket_t
);
96 static void sched_clutch_bucket_cpu_adjust(sched_clutch_bucket_t
);
97 static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t
);
98 static boolean_t
sched_thread_sched_pri_promoted(thread_t
);
99 /* Clutch membership management */
100 static boolean_t
sched_clutch_thread_insert(sched_clutch_root_t
, thread_t
, integer_t
);
101 static void sched_clutch_thread_remove(sched_clutch_root_t
, thread_t
, uint64_t, sched_clutch_bucket_options_t
);
102 static thread_t
sched_clutch_thread_highest(sched_clutch_root_t
);
104 /* Clutch properties updates */
105 static uint32_t sched_clutch_root_urgency(sched_clutch_root_t
);
106 static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t
);
107 static int sched_clutch_root_priority(sched_clutch_root_t
);
110 /* System based routines */
111 static bool sched_clutch_pset_available(processor_set_t
);
114 /* Helper debugging routines */
115 static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t
);
120 * Global priority queue comparator routine for root buckets. The
121 * routine implements the priority queue as a minimum deadline queue
122 * to achieve EDF scheduling.
124 priority_queue_compare_fn_t sched_clutch_root_bucket_compare
;
128 * Special markers for buckets that have invalid WCELs/quantums etc.
130 #define SCHED_CLUTCH_INVALID_TIME_32 ((uint32_t)~0)
131 #define SCHED_CLUTCH_INVALID_TIME_64 ((uint64_t)~0)
134 * Root level bucket WCELs
136 * The root level bucket selection algorithm is an Earliest Deadline
137 * First (EDF) algorithm where the deadline for buckets are defined
138 * by the worst-case-execution-latency and the make runnable timestamp
142 static uint32_t sched_clutch_root_bucket_wcel_us
[TH_BUCKET_SCHED_MAX
] = {
143 SCHED_CLUTCH_INVALID_TIME_32
, /* FIXPRI */
145 37500, /* IN (37.5ms) */
146 75000, /* DF (75ms) */
147 150000, /* UT (150ms) */
148 250000 /* BG (250ms) */
150 static uint64_t sched_clutch_root_bucket_wcel
[TH_BUCKET_SCHED_MAX
] = {0};
153 * Root level bucket warp
155 * Each root level bucket has a warp value associated with it as well.
156 * The warp value allows the root bucket to effectively warp ahead of
157 * lower priority buckets for a limited time even if it has a later
158 * deadline. The warping behavior provides extra (but limited)
159 * opportunity for high priority buckets to remain responsive.
162 /* Special warp deadline value to indicate that the bucket has not used any warp yet */
163 #define SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED (SCHED_CLUTCH_INVALID_TIME_64)
165 /* Warp window durations for various tiers */
166 static uint32_t sched_clutch_root_bucket_warp_us
[TH_BUCKET_SCHED_MAX
] = {
167 SCHED_CLUTCH_INVALID_TIME_32
, /* FIXPRI */
174 static uint64_t sched_clutch_root_bucket_warp
[TH_BUCKET_SCHED_MAX
] = {0};
177 * Thread level quantum
179 * The algorithm defines quantums for threads at various buckets. This
180 * (combined with the root level bucket quantums) restricts how much
181 * the lower priority levels can preempt the higher priority threads.
183 static uint32_t sched_clutch_thread_quantum_us
[TH_BUCKET_SCHED_MAX
] = {
184 10000, /* FIXPRI (10ms) */
185 10000, /* FG (10ms) */
191 static uint64_t sched_clutch_thread_quantum
[TH_BUCKET_SCHED_MAX
] = {0};
193 enum sched_clutch_state
{
194 SCHED_CLUTCH_STATE_EMPTY
= 0,
195 SCHED_CLUTCH_STATE_RUNNABLE
,
199 * sched_clutch_us_to_abstime()
201 * Initializer for converting all durations in usec to abstime
204 sched_clutch_us_to_abstime(uint32_t *us_vals
, uint64_t *abstime_vals
)
206 for (int i
= 0; i
< TH_BUCKET_SCHED_MAX
; i
++) {
207 if (us_vals
[i
] == SCHED_CLUTCH_INVALID_TIME_32
) {
208 abstime_vals
[i
] = SCHED_CLUTCH_INVALID_TIME_64
;
210 clock_interval_to_absolutetime_interval(us_vals
[i
],
211 NSEC_PER_USEC
, &abstime_vals
[i
]);
216 #if DEVELOPMENT || DEBUG
219 * sched_clutch_hierarchy_locked_assert()
221 * Debugging helper routine. Asserts that the hierarchy is locked. The locking
222 * for the hierarchy depends on where the hierarchy is hooked. The current
223 * implementation hooks the hierarchy at the pset, so the hierarchy is locked
224 * using the pset lock.
227 sched_clutch_hierarchy_locked_assert(
228 sched_clutch_root_t root_clutch
)
230 pset_assert_locked(root_clutch
->scr_pset
);
233 #else /* DEVELOPMENT || DEBUG */
236 sched_clutch_hierarchy_locked_assert(
237 __unused sched_clutch_root_t root_clutch
)
241 #endif /* DEVELOPMENT || DEBUG */
244 * sched_clutch_thr_count_inc()
246 * Increment thread count at a hierarchy level with overflow checks.
249 sched_clutch_thr_count_inc(
252 if (__improbable(os_inc_overflow(thr_count
))) {
253 panic("sched_clutch thread count overflowed!");
258 * sched_clutch_thr_count_dec()
260 * Decrement thread count at a hierarchy level with underflow checks.
263 sched_clutch_thr_count_dec(
266 if (__improbable(os_dec_overflow(thr_count
))) {
267 panic("sched_clutch thread count underflowed!");
274 * sched_clutch_pset_available()
276 * Routine to determine if a pset is available for scheduling.
279 sched_clutch_pset_available(processor_set_t pset
)
281 /* Check if cluster has none of the CPUs available */
282 if (pset
->online_processor_count
== 0) {
286 /* Check if the cluster is not recommended by CLPC */
287 if (!pset_is_recommended(pset
)) {
297 * sched_clutch_root_init()
299 * Routine to initialize the scheduler hierarchy root.
302 sched_clutch_root_init(
303 sched_clutch_root_t root_clutch
,
304 processor_set_t pset
)
306 root_clutch
->scr_thr_count
= 0;
307 root_clutch
->scr_priority
= NOPRI
;
308 root_clutch
->scr_urgency
= 0;
309 root_clutch
->scr_pset
= pset
;
311 /* Initialize the queue which maintains all runnable clutch_buckets for timesharing purposes */
312 queue_init(&root_clutch
->scr_clutch_buckets
);
314 /* Initialize the queue which maintains all runnable foreign clutch buckets */
315 queue_init(&root_clutch
->scr_foreign_buckets
);
317 /* Initialize the bitmap and priority queue of runnable root buckets */
318 sched_clutch_root_bucket_compare
= priority_heap_make_comparator(a
, b
, struct sched_clutch_root_bucket
, scrb_pqlink
, {
319 return (a
->scrb_deadline
< b
->scrb_deadline
) ? 1 : ((a
->scrb_deadline
== b
->scrb_deadline
) ? 0 : -1);
321 priority_queue_init(&root_clutch
->scr_root_buckets
, PRIORITY_QUEUE_GENERIC_KEY
| PRIORITY_QUEUE_MIN_HEAP
);
322 bitmap_zero(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_SCHED_MAX
);
323 bitmap_zero(root_clutch
->scr_warp_available
, TH_BUCKET_SCHED_MAX
);
325 /* Initialize all the root buckets */
326 for (uint32_t i
= 0; i
< TH_BUCKET_SCHED_MAX
; i
++) {
327 sched_clutch_root_bucket_init(&root_clutch
->scr_buckets
[i
], i
);
332 * Clutch Bucket Runqueues
334 * The clutch buckets are maintained in a runq at the root bucket level. The
335 * runq organization allows clutch buckets to be ordered based on various
338 * - Clutch buckets are round robin'ed at the same priority level when a
339 * thread is selected from a clutch bucket. This prevents a clutch bucket
340 * from starving out other clutch buckets at the same priority.
342 * - Clutch buckets are inserted at the head when it becomes runnable due to
343 * thread preemption. This allows threads that were preempted to maintain
344 * their order in the queue.
349 * sched_clutch_bucket_runq_init()
351 * Initialize a clutch bucket runq.
354 sched_clutch_bucket_runq_init(
355 sched_clutch_bucket_runq_t clutch_buckets_rq
)
357 clutch_buckets_rq
->scbrq_highq
= NOPRI
;
358 for (uint8_t i
= 0; i
< BITMAP_LEN(NRQS
); i
++) {
359 clutch_buckets_rq
->scbrq_bitmap
[i
] = 0;
361 clutch_buckets_rq
->scbrq_count
= 0;
362 for (int i
= 0; i
< NRQS
; i
++) {
363 circle_queue_init(&clutch_buckets_rq
->scbrq_queues
[i
]);
368 * sched_clutch_bucket_runq_empty()
370 * Returns if a clutch bucket runq is empty.
373 sched_clutch_bucket_runq_empty(
374 sched_clutch_bucket_runq_t clutch_buckets_rq
)
376 return clutch_buckets_rq
->scbrq_count
== 0;
380 * sched_clutch_bucket_runq_peek()
382 * Returns the highest priority clutch bucket in the runq.
384 static sched_clutch_bucket_t
385 sched_clutch_bucket_runq_peek(
386 sched_clutch_bucket_runq_t clutch_buckets_rq
)
388 if (clutch_buckets_rq
->scbrq_count
> 0) {
389 circle_queue_t queue
= &clutch_buckets_rq
->scbrq_queues
[clutch_buckets_rq
->scbrq_highq
];
390 return cqe_queue_first(queue
, struct sched_clutch_bucket
, scb_runqlink
);
397 * sched_clutch_bucket_runq_enqueue()
399 * Enqueue a clutch bucket into the runq based on the options passed in.
402 sched_clutch_bucket_runq_enqueue(
403 sched_clutch_bucket_runq_t clutch_buckets_rq
,
404 sched_clutch_bucket_t clutch_bucket
,
405 sched_clutch_bucket_options_t options
)
407 circle_queue_t queue
= &clutch_buckets_rq
->scbrq_queues
[clutch_bucket
->scb_priority
];
408 if (circle_queue_empty(queue
)) {
409 circle_enqueue_tail(queue
, &clutch_bucket
->scb_runqlink
);
410 bitmap_set(clutch_buckets_rq
->scbrq_bitmap
, clutch_bucket
->scb_priority
);
411 if (clutch_bucket
->scb_priority
> clutch_buckets_rq
->scbrq_highq
) {
412 clutch_buckets_rq
->scbrq_highq
= clutch_bucket
->scb_priority
;
415 if (options
& SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ
) {
416 circle_enqueue_head(queue
, &clutch_bucket
->scb_runqlink
);
419 * Default behavior (handles SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ &
420 * SCHED_CLUTCH_BUCKET_OPTIONS_NONE)
422 circle_enqueue_tail(queue
, &clutch_bucket
->scb_runqlink
);
425 clutch_buckets_rq
->scbrq_count
++;
429 * sched_clutch_bucket_runq_remove()
431 * Remove a clutch bucket from the runq.
434 sched_clutch_bucket_runq_remove(
435 sched_clutch_bucket_runq_t clutch_buckets_rq
,
436 sched_clutch_bucket_t clutch_bucket
)
438 circle_queue_t queue
= &clutch_buckets_rq
->scbrq_queues
[clutch_bucket
->scb_priority
];
439 circle_dequeue(queue
, &clutch_bucket
->scb_runqlink
);
440 assert(clutch_buckets_rq
->scbrq_count
> 0);
441 clutch_buckets_rq
->scbrq_count
--;
442 if (circle_queue_empty(queue
)) {
443 bitmap_clear(clutch_buckets_rq
->scbrq_bitmap
, clutch_bucket
->scb_priority
);
444 clutch_buckets_rq
->scbrq_highq
= bitmap_first(clutch_buckets_rq
->scbrq_bitmap
, NRQS
);
449 sched_clutch_bucket_runq_rotate(
450 sched_clutch_bucket_runq_t clutch_buckets_rq
,
451 sched_clutch_bucket_t clutch_bucket
)
453 circle_queue_t queue
= &clutch_buckets_rq
->scbrq_queues
[clutch_bucket
->scb_priority
];
454 assert(clutch_bucket
== cqe_queue_first(queue
, struct sched_clutch_bucket
, scb_runqlink
));
455 circle_queue_rotate_head_forward(queue
);
459 * sched_clutch_root_bucket_init()
461 * Routine to initialize root buckets.
464 sched_clutch_root_bucket_init(
465 sched_clutch_root_bucket_t root_bucket
,
466 sched_bucket_t bucket
)
468 root_bucket
->scrb_bucket
= bucket
;
469 sched_clutch_bucket_runq_init(&root_bucket
->scrb_clutch_buckets
);
470 priority_queue_entry_init(&root_bucket
->scrb_pqlink
);
471 root_bucket
->scrb_deadline
= SCHED_CLUTCH_INVALID_TIME_64
;
472 root_bucket
->scrb_warped_deadline
= 0;
473 root_bucket
->scrb_warp_remaining
= sched_clutch_root_bucket_warp
[root_bucket
->scrb_bucket
];
477 * sched_clutch_root_bucket_pri_compare()
479 * Routine to compare root buckets based on the highest runnable clutch
480 * bucket priorities in the root buckets.
483 sched_clutch_root_bucket_pri_compare(
484 sched_clutch_root_bucket_t a
,
485 sched_clutch_root_bucket_t b
)
487 sched_clutch_bucket_t a_highest
= sched_clutch_root_bucket_highest_clutch_bucket(a
);
488 sched_clutch_bucket_t b_highest
= sched_clutch_root_bucket_highest_clutch_bucket(b
);
489 return (a_highest
->scb_priority
> b_highest
->scb_priority
) ?
490 1 : ((a_highest
->scb_priority
== b_highest
->scb_priority
) ? 0 : -1);
494 * sched_clutch_root_select_aboveui()
496 * Special case scheduling for Above UI bucket.
498 * AboveUI threads are typically system critical threads that need low latency
499 * which is why they are handled specially.
501 * Since the priority range for AboveUI and FG Timeshare buckets overlap, it is
502 * important to maintain some native priority order between those buckets. The policy
503 * implemented here is to compare the highest clutch buckets of both buckets; if the
504 * Above UI bucket is higher, schedule it immediately. Otherwise fall through to the
505 * deadline based scheduling which should pickup the timeshare buckets.
507 * The implementation allows extremely low latency CPU access for Above UI threads
508 * while supporting the use case of high priority timeshare threads contending with
509 * lower priority fixed priority threads.
512 sched_clutch_root_select_aboveui(
513 sched_clutch_root_t root_clutch
)
515 if (bitmap_test(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_FIXPRI
)) {
516 sched_clutch_root_bucket_t root_bucket_aboveui
= &root_clutch
->scr_buckets
[TH_BUCKET_FIXPRI
];
517 sched_clutch_root_bucket_t root_bucket_sharefg
= &root_clutch
->scr_buckets
[TH_BUCKET_SHARE_FG
];
519 if (!bitmap_test(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_SHARE_FG
)) {
520 /* If the timeshare FG bucket is not runnable, pick the aboveUI bucket for scheduling */
523 if (sched_clutch_root_bucket_pri_compare(root_bucket_aboveui
, root_bucket_sharefg
) >= 0) {
524 /* If the aboveUI bucket has a higher native clutch bucket priority, schedule it */
533 * sched_clutch_root_highest_root_bucket()
535 * Main routine to find the highest runnable root level bucket.
536 * This routine is called from performance sensitive contexts; so it is
537 * crucial to keep this O(1).
540 static sched_clutch_root_bucket_t
541 sched_clutch_root_highest_root_bucket(
542 sched_clutch_root_t root_clutch
,
545 sched_clutch_hierarchy_locked_assert(root_clutch
);
546 if (bitmap_lsb_first(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_SCHED_MAX
) == -1) {
550 if (sched_clutch_root_select_aboveui(root_clutch
)) {
551 return &root_clutch
->scr_buckets
[TH_BUCKET_FIXPRI
];
555 * Above UI bucket is not runnable or has a low priority clutch bucket; use the earliest deadline model
556 * to schedule threads. The idea is that as the timeshare buckets use CPU, they will drop their
557 * interactivity score and allow low priority AboveUI clutch buckets to be scheduled.
560 /* Find the earliest deadline bucket */
561 sched_clutch_root_bucket_t edf_bucket
= priority_queue_min(&root_clutch
->scr_root_buckets
, struct sched_clutch_root_bucket
, scrb_pqlink
);
563 sched_clutch_root_bucket_t warp_bucket
= NULL
;
564 int warp_bucket_index
= -1;
565 evaluate_warp_buckets
:
566 /* Check if any higher runnable buckets have warp available */
567 warp_bucket_index
= bitmap_lsb_first(root_clutch
->scr_warp_available
, TH_BUCKET_SCHED_MAX
);
569 if ((warp_bucket_index
== -1) || (warp_bucket_index
>= edf_bucket
->scrb_bucket
)) {
570 /* No higher buckets have warp available; choose the edf bucket and replenish its warp */
571 sched_clutch_root_bucket_deadline_update(edf_bucket
, root_clutch
, timestamp
);
572 edf_bucket
->scrb_warp_remaining
= sched_clutch_root_bucket_warp
[edf_bucket
->scrb_bucket
];
573 edf_bucket
->scrb_warped_deadline
= SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED
;
574 bitmap_set(root_clutch
->scr_warp_available
, edf_bucket
->scrb_bucket
);
579 * Looks like there is a root bucket which is higher in the natural priority
580 * order than edf_bucket and might have some warp remaining.
582 warp_bucket
= &root_clutch
->scr_buckets
[warp_bucket_index
];
583 if (warp_bucket
->scrb_warped_deadline
== SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED
) {
584 /* Root bucket has not used any of its warp; set a deadline to expire its warp and return it */
585 warp_bucket
->scrb_warped_deadline
= timestamp
+ warp_bucket
->scrb_warp_remaining
;
586 sched_clutch_root_bucket_deadline_update(warp_bucket
, root_clutch
, timestamp
);
589 if (warp_bucket
->scrb_warped_deadline
> timestamp
) {
590 /* Root bucket already has a warp window open with some warp remaining */
591 sched_clutch_root_bucket_deadline_update(warp_bucket
, root_clutch
, timestamp
);
595 /* For this bucket, warp window was opened sometime in the past but has now
596 * expired. Mark the bucket as not avilable for warp anymore and re-run the
597 * warp bucket selection logic.
599 warp_bucket
->scrb_warp_remaining
= 0;
600 bitmap_clear(root_clutch
->scr_warp_available
, warp_bucket
->scrb_bucket
);
601 goto evaluate_warp_buckets
;
605 * sched_clutch_root_bucket_deadline_calculate()
607 * Calculate the deadline for the bucket based on its WCEL
610 sched_clutch_root_bucket_deadline_calculate(
611 sched_clutch_root_bucket_t root_bucket
,
614 /* For fixpri AboveUI bucket always return it as the earliest deadline */
615 if (root_bucket
->scrb_bucket
< TH_BUCKET_SHARE_FG
) {
619 /* For all timeshare buckets set the deadline as current time + worst-case-execution-latency */
620 return timestamp
+ sched_clutch_root_bucket_wcel
[root_bucket
->scrb_bucket
];
624 * sched_clutch_root_bucket_deadline_update()
626 * Routine to update the deadline of the root bucket when it is selected.
627 * Updating the deadline also moves the root_bucket in the EDF priority
631 sched_clutch_root_bucket_deadline_update(
632 sched_clutch_root_bucket_t root_bucket
,
633 sched_clutch_root_t root_clutch
,
636 if (root_bucket
->scrb_bucket
== TH_BUCKET_FIXPRI
) {
637 /* The algorithm never uses the deadlines for scheduling TH_BUCKET_FIXPRI bucket */
640 uint64_t old_deadline
= root_bucket
->scrb_deadline
;
641 uint64_t new_deadline
= sched_clutch_root_bucket_deadline_calculate(root_bucket
, timestamp
);
642 assert(old_deadline
<= new_deadline
);
643 if (old_deadline
!= new_deadline
) {
644 root_bucket
->scrb_deadline
= new_deadline
;
645 /* Since the priority queue is a min-heap, use the decrease routine even though the deadline has a larger value now */
646 priority_queue_entry_decrease(&root_clutch
->scr_root_buckets
, &root_bucket
->scrb_pqlink
, PRIORITY_QUEUE_KEY_NONE
, sched_clutch_root_bucket_compare
);
651 * sched_clutch_root_bucket_runnable()
653 * Routine to insert a newly runnable root bucket into the hierarchy.
654 * Also updates the deadline and warp parameters as necessary.
657 sched_clutch_root_bucket_runnable(
658 sched_clutch_root_bucket_t root_bucket
,
659 sched_clutch_root_t root_clutch
,
662 /* Mark the root bucket as runnable */
663 bitmap_set(root_clutch
->scr_runnable_bitmap
, root_bucket
->scrb_bucket
);
664 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE
) | DBG_FUNC_NONE
,
665 root_bucket
->scrb_bucket
, SCHED_CLUTCH_STATE_RUNNABLE
, 0, 0, 0);
667 if (root_bucket
->scrb_bucket
== TH_BUCKET_FIXPRI
) {
668 /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */
672 root_bucket
->scrb_deadline
= sched_clutch_root_bucket_deadline_calculate(root_bucket
, timestamp
);
673 priority_queue_insert(&root_clutch
->scr_root_buckets
, &root_bucket
->scrb_pqlink
, PRIORITY_QUEUE_KEY_NONE
, sched_clutch_root_bucket_compare
);
675 if (root_bucket
->scrb_warp_remaining
) {
676 /* Since the bucket has some warp remaining and its now runnable, mark it as available for warp */
677 bitmap_set(root_clutch
->scr_warp_available
, root_bucket
->scrb_bucket
);
682 * sched_clutch_root_bucket_empty()
684 * Routine to remove an empty root bucket from the hierarchy.
685 * Also updates the deadline and warp parameters as necessary.
688 sched_clutch_root_bucket_empty(
689 sched_clutch_root_bucket_t root_bucket
,
690 sched_clutch_root_t root_clutch
,
693 bitmap_clear(root_clutch
->scr_runnable_bitmap
, root_bucket
->scrb_bucket
);
694 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE
) | DBG_FUNC_NONE
,
695 root_bucket
->scrb_bucket
, SCHED_CLUTCH_STATE_EMPTY
, 0, 0, 0);
697 if (root_bucket
->scrb_bucket
== TH_BUCKET_FIXPRI
) {
698 /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */
702 priority_queue_remove(&root_clutch
->scr_root_buckets
, &root_bucket
->scrb_pqlink
, sched_clutch_root_bucket_compare
);
704 bitmap_clear(root_clutch
->scr_warp_available
, root_bucket
->scrb_bucket
);
705 if (root_bucket
->scrb_warped_deadline
> timestamp
) {
707 * For root buckets that were using the warp, check if the warp
708 * deadline is in the future. If yes, remove the wall time the
709 * warp was active and update the warp remaining. This allows
710 * the root bucket to use the remaining warp the next time it
713 root_bucket
->scrb_warp_remaining
= root_bucket
->scrb_warped_deadline
- timestamp
;
714 } else if (root_bucket
->scrb_warped_deadline
!= SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED
) {
716 * If the root bucket's warped deadline is in the past, it has used up
717 * all the warp it was assigned. Empty out its warp remaining.
719 root_bucket
->scrb_warp_remaining
= 0;
724 * sched_clutch_root_pri_update()
726 * The root level priority is used for thread selection and preemption
730 sched_clutch_root_pri_update(
731 sched_clutch_root_t root_clutch
)
733 sched_clutch_hierarchy_locked_assert(root_clutch
);
734 if (bitmap_lsb_first(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_SCHED_MAX
) == -1) {
735 /* No runnable root buckets */
736 root_clutch
->scr_priority
= NOPRI
;
737 assert(root_clutch
->scr_urgency
== 0);
740 sched_clutch_root_bucket_t root_bucket
= NULL
;
741 /* Special case for AboveUI (uses same logic as thread selection) */
742 if (sched_clutch_root_select_aboveui(root_clutch
)) {
743 root_bucket
= &root_clutch
->scr_buckets
[TH_BUCKET_FIXPRI
];
746 * AboveUI bucket is not runnable or has a low clutch bucket priority,
747 * select the next runnable root bucket in natural priority order. This logic
748 * is slightly different from thread selection, because thread selection
749 * considers deadlines, warps etc. to decide the most optimal bucket at a
750 * given timestamp. Since the priority value is used for preemption decisions
751 * only, it needs to be based on the highest runnable thread available in
752 * the timeshare domain.
754 int root_bucket_index
= bitmap_lsb_next(root_clutch
->scr_runnable_bitmap
, TH_BUCKET_SCHED_MAX
, TH_BUCKET_FIXPRI
);
755 assert(root_bucket_index
!= -1);
756 root_bucket
= &root_clutch
->scr_buckets
[root_bucket_index
];
758 /* For the selected root bucket, find the highest priority clutch bucket */
759 sched_clutch_bucket_t clutch_bucket
= sched_clutch_root_bucket_highest_clutch_bucket(root_bucket
);
760 root_clutch
->scr_priority
= priority_queue_max_key(&clutch_bucket
->scb_clutchpri_prioq
);
764 * sched_clutch_root_urgency_inc()
766 * Routine to increment the urgency at the root level based on the thread
767 * priority that is being inserted into the hierarchy. The root urgency
768 * counter is updated based on the urgency of threads in any of the
769 * clutch buckets which are part of the hierarchy.
771 * Always called with the pset lock held.
774 sched_clutch_root_urgency_inc(
775 sched_clutch_root_t root_clutch
,
778 if (SCHED(priority_is_urgent
)(thread
->sched_pri
)) {
779 root_clutch
->scr_urgency
++;
784 * sched_clutch_root_urgency_dec()
786 * Routine to decrement the urgency at the root level based on the thread
787 * priority that is being removed from the hierarchy. The root urgency
788 * counter is updated based on the urgency of threads in any of the
789 * clutch buckets which are part of the hierarchy.
791 * Always called with the pset lock held.
794 sched_clutch_root_urgency_dec(
795 sched_clutch_root_t root_clutch
,
798 if (SCHED(priority_is_urgent
)(thread
->sched_pri
)) {
799 root_clutch
->scr_urgency
--;
804 * Clutch bucket level scheduling
806 * The second level of scheduling is the clutch bucket level scheduling
807 * which tries to schedule thread groups within root_buckets. Each
808 * clutch represents a thread group and a clutch_bucket represents
809 * threads at a particular sched_bucket within that thread group. The
810 * goal of this level of scheduling is to allow interactive thread
811 * groups low latency access to the CPU. It also provides slight
812 * scheduling preference for App and unrestricted thread groups.
814 * The clutch bucket scheduling algorithm measures an interactivity
815 * score for all clutch buckets. The interactivity score is based
816 * on the ratio of the CPU used and the voluntary blocking of threads
817 * within the clutch bucket. The algorithm is very close to the ULE
818 * scheduler on FreeBSD in terms of calculations. The interactivity
819 * score provides an interactivity boost in the range of
820 * [0:SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI * 2] which allows interactive
821 * thread groups to win over CPU spinners.
824 /* Priority boost range for interactivity */
825 #define SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT (8)
826 uint8_t sched_clutch_bucket_interactive_pri
= SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT
;
828 /* window to scale the cpu usage and blocked values (currently 500ms). Its the threshold of used+blocked */
829 uint64_t sched_clutch_bucket_adjust_threshold
= 0;
830 #define SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS (500000)
832 /* The ratio to scale the cpu/blocked time per window */
833 #define SCHED_CLUTCH_BUCKET_ADJUST_RATIO (10)
835 /* rate at which interactivity score is recalculated. This keeps the score smooth in terms of extremely bursty behavior */
836 uint64_t sched_clutch_bucket_interactivity_delta
= 0;
837 #define SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT (25000)
840 * In order to allow App thread groups some preference over daemon thread
841 * groups, the App clutch_buckets get a 8 point boost. The boost value should
842 * be chosen such that badly behaved apps are still penalized over well
843 * behaved interactive daemon clutch_buckets.
845 #define SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT (8)
846 uint8_t sched_clutch_bucket_pri_boost
= SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT
;
848 /* Initial value for voluntary blocking time for the clutch_bucket */
849 #define SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID (uint32_t)(~0)
852 * sched_clutch_bucket_init()
854 * Initializer for clutch buckets.
857 sched_clutch_bucket_init(
858 sched_clutch_bucket_t clutch_bucket
,
859 sched_clutch_t clutch
,
860 sched_bucket_t bucket
)
862 bzero(clutch_bucket
, sizeof(struct sched_clutch_bucket
));
864 clutch_bucket
->scb_bucket
= bucket
;
865 /* scb_priority will be recalculated when a thread is inserted in the clutch bucket */
866 clutch_bucket
->scb_priority
= 0;
868 * All thread groups should be initialized to be interactive; this allows the newly launched
869 * thread groups to fairly compete with already running thread groups.
871 clutch_bucket
->scb_interactivity_score
= (sched_clutch_bucket_interactive_pri
* 2);
872 clutch_bucket
->scb_foreign
= false;
874 os_atomic_store(&clutch_bucket
->scb_timeshare_tick
, 0, relaxed
);
875 os_atomic_store(&clutch_bucket
->scb_pri_shift
, INT8_MAX
, relaxed
);
877 clutch_bucket
->scb_interactivity_ts
= 0;
878 clutch_bucket
->scb_blocked_ts
= SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID
;
879 clutch_bucket
->scb_clutch
= clutch
;
880 clutch_bucket
->scb_root
= NULL
;
881 priority_queue_init(&clutch_bucket
->scb_clutchpri_prioq
, PRIORITY_QUEUE_BUILTIN_KEY
| PRIORITY_QUEUE_MAX_HEAP
);
882 run_queue_init(&clutch_bucket
->scb_runq
);
886 * sched_clutch_init_with_thread_group()
888 * Initialize the sched_clutch when the thread group is being created
891 sched_clutch_init_with_thread_group(
892 sched_clutch_t clutch
,
893 struct thread_group
*tg
)
895 os_atomic_store(&clutch
->sc_thr_count
, 0, relaxed
);
897 /* Initialize all the clutch buckets */
898 for (uint32_t i
= 0; i
< TH_BUCKET_SCHED_MAX
; i
++) {
899 sched_clutch_bucket_init(&(clutch
->sc_clutch_buckets
[i
]), clutch
, i
);
902 /* Grouping specific fields */
904 os_atomic_store(&clutch
->sc_tg_priority
, 0, relaxed
);
908 * sched_clutch_destroy()
910 * Destructor for clutch; called from thread group release code.
913 sched_clutch_destroy(
914 __unused sched_clutch_t clutch
)
916 assert(os_atomic_load(&clutch
->sc_thr_count
, relaxed
) == 0);
922 * sched_clutch_bucket_foreign()
924 * Identifies if the clutch bucket is a foreign (not recommended for) this
925 * hierarchy. This is possible due to the recommended hierarchy/pset not
926 * available for scheduling currently.
929 sched_clutch_bucket_foreign(sched_clutch_root_t root_clutch
, sched_clutch_bucket_t clutch_bucket
)
931 assert(clutch_bucket
->scb_thr_count
> 0);
932 if (!sched_clutch_pset_available(root_clutch
->scr_pset
)) {
933 /* Even though the pset was not available for scheduling, threads
934 * are being put in its runq (this might be due to the other pset
935 * being turned off and this being the master processor pset).
936 * Mark the clutch bucket as foreign so that when the other
937 * pset becomes available, it moves the clutch bucket accordingly.
941 thread_t thread
= run_queue_peek(&clutch_bucket
->scb_runq
);
942 pset_cluster_type_t pset_type
= recommended_pset_type(thread
);
943 return pset_type
!= root_clutch
->scr_pset
->pset_cluster_type
;
949 * sched_clutch_bucket_hierarchy_insert()
951 * Routine to insert a newly runnable clutch_bucket into the root hierarchy.
954 sched_clutch_bucket_hierarchy_insert(
955 sched_clutch_root_t root_clutch
,
956 sched_clutch_bucket_t clutch_bucket
,
957 sched_bucket_t bucket
,
959 sched_clutch_bucket_options_t options
)
961 sched_clutch_hierarchy_locked_assert(root_clutch
);
962 if (bucket
> TH_BUCKET_FIXPRI
) {
963 /* Enqueue the timeshare clutch buckets into the global runnable clutch_bucket list; used for sched tick operations */
964 enqueue_tail(&root_clutch
->scr_clutch_buckets
, &clutch_bucket
->scb_listlink
);
967 /* Check if the bucket is a foreign clutch bucket and add it to the foreign buckets list */
968 if (sched_clutch_bucket_foreign(root_clutch
, clutch_bucket
)) {
969 clutch_bucket
->scb_foreign
= true;
970 enqueue_tail(&root_clutch
->scr_foreign_buckets
, &clutch_bucket
->scb_foreignlink
);
973 sched_clutch_root_bucket_t root_bucket
= &root_clutch
->scr_buckets
[bucket
];
975 /* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */
976 if (sched_clutch_bucket_runq_empty(&root_bucket
->scrb_clutch_buckets
)) {
977 sched_clutch_root_bucket_runnable(root_bucket
, root_clutch
, timestamp
);
980 /* Insert the clutch bucket into the root bucket run queue with order based on options */
981 sched_clutch_bucket_runq_enqueue(&root_bucket
->scrb_clutch_buckets
, clutch_bucket
, options
);
982 os_atomic_store(&clutch_bucket
->scb_root
, root_clutch
, relaxed
);
983 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_TG_BUCKET_STATE
) | DBG_FUNC_NONE
,
984 thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, SCHED_CLUTCH_STATE_RUNNABLE
, clutch_bucket
->scb_priority
, 0);
988 * sched_clutch_bucket_hierarchy_remove()
990 * Rotuine to remove a empty clutch bucket from the root hierarchy.
993 sched_clutch_bucket_hierarchy_remove(
994 sched_clutch_root_t root_clutch
,
995 sched_clutch_bucket_t clutch_bucket
,
996 sched_bucket_t bucket
,
998 __unused sched_clutch_bucket_options_t options
)
1000 sched_clutch_hierarchy_locked_assert(root_clutch
);
1001 if (bucket
> TH_BUCKET_FIXPRI
) {
1002 /* Remove the timeshare clutch bucket from the globally runnable clutch_bucket list */
1003 remqueue(&clutch_bucket
->scb_listlink
);
1006 if (clutch_bucket
->scb_foreign
) {
1007 clutch_bucket
->scb_foreign
= false;
1008 remqueue(&clutch_bucket
->scb_foreignlink
);
1010 #endif /* __AMP__ */
1012 sched_clutch_root_bucket_t root_bucket
= &root_clutch
->scr_buckets
[bucket
];
1014 /* Remove the clutch bucket from the root bucket priority queue */
1015 sched_clutch_bucket_runq_remove(&root_bucket
->scrb_clutch_buckets
, clutch_bucket
);
1016 os_atomic_store(&clutch_bucket
->scb_root
, NULL
, relaxed
);
1017 clutch_bucket
->scb_blocked_ts
= timestamp
;
1018 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_TG_BUCKET_STATE
) | DBG_FUNC_NONE
,
1019 thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, SCHED_CLUTCH_STATE_EMPTY
, 0, 0);
1021 /* If the root bucket priority queue is now empty, remove it from the root priority queue */
1022 if (sched_clutch_bucket_runq_empty(&root_bucket
->scrb_clutch_buckets
)) {
1023 sched_clutch_root_bucket_empty(root_bucket
, root_clutch
, timestamp
);
1028 * sched_clutch_bucket_base_pri()
1030 * Calculates the "base" priority of the clutch bucket. The base
1031 * priority of the clutch bucket is the sum of the max of highest
1032 * base_pri and highest sched_pri in the clutch bucket and any
1033 * grouping specific (App/Daemon...) boosts applicable to the
1037 sched_clutch_bucket_base_pri(
1038 sched_clutch_bucket_t clutch_bucket
)
1040 uint8_t clutch_boost
= 0;
1041 assert(clutch_bucket
->scb_runq
.count
!= 0);
1043 sched_clutch_t clutch
= clutch_bucket
->scb_clutch
;
1046 * Since the clutch bucket can contain threads that are members of the group due
1047 * to the sched_pri being promoted or due to their base pri, the base priority of
1048 * the entire clutch bucket should be based on the highest thread (promoted or base)
1049 * in the clutch bucket.
1051 uint8_t max_pri
= priority_queue_empty(&clutch_bucket
->scb_clutchpri_prioq
) ? 0 : priority_queue_max_key(&clutch_bucket
->scb_clutchpri_prioq
);
1054 * For all AboveUI clutch buckets and clutch buckets for thread groups that
1055 * havent been specified as SCHED_CLUTCH_TG_PRI_LOW, give a priority boost
1057 if ((clutch_bucket
->scb_bucket
== TH_BUCKET_FIXPRI
) ||
1058 (os_atomic_load(&clutch
->sc_tg_priority
, relaxed
) != SCHED_CLUTCH_TG_PRI_LOW
)) {
1059 clutch_boost
= sched_clutch_bucket_pri_boost
;
1061 return max_pri
+ clutch_boost
;
1065 * sched_clutch_bucket_interactivity_score_calculate()
1067 * Routine to calculate the interactivity score for the clutch bucket. The
1068 * interactivity score is based on the ratio of CPU used by all threads in
1069 * the bucket and the blocked time of the bucket as a whole.
1072 sched_clutch_bucket_interactivity_score_calculate(
1073 sched_clutch_bucket_t clutch_bucket
,
1076 if (clutch_bucket
->scb_bucket
== TH_BUCKET_FIXPRI
) {
1078 * Since the root bucket selection algorithm for Above UI looks at clutch bucket
1079 * priorities, make sure all AboveUI buckets are marked interactive.
1081 assert(clutch_bucket
->scb_interactivity_score
== (2 * sched_clutch_bucket_interactive_pri
));
1082 return clutch_bucket
->scb_interactivity_score
;
1085 if (clutch_bucket
->scb_interactivity_ts
== 0) {
1087 * This indicates a newly initialized clutch bucket; return the default interactivity score
1088 * and update timestamp.
1090 clutch_bucket
->scb_interactivity_ts
= timestamp
;
1091 return clutch_bucket
->scb_interactivity_score
;
1094 if (timestamp
< (clutch_bucket
->scb_interactivity_ts
+ sched_clutch_bucket_interactivity_delta
)) {
1095 return clutch_bucket
->scb_interactivity_score
;
1098 /* Check if the clutch bucket accounting needs to be scaled */
1099 sched_clutch_bucket_cpu_adjust(clutch_bucket
);
1100 clutch_bucket
->scb_interactivity_ts
= timestamp
;
1102 sched_clutch_bucket_cpu_data_t scb_cpu_data
;
1103 scb_cpu_data
.scbcd_cpu_data_packed
= os_atomic_load_wide(&clutch_bucket
->scb_cpu_data
.scbcd_cpu_data_packed
, relaxed
);
1104 clutch_cpu_data_t cpu_used
= scb_cpu_data
.cpu_data
.scbcd_cpu_used
;
1105 clutch_cpu_data_t cpu_blocked
= scb_cpu_data
.cpu_data
.scbcd_cpu_blocked
;
1108 * In extremely CPU contended cases, it is possible that the clutch bucket has been runnable
1109 * for a long time but none of its threads have been picked up for execution. In that case, both
1110 * the CPU used and blocked would be 0.
1112 if ((cpu_blocked
== 0) && (cpu_used
== 0)) {
1113 return clutch_bucket
->scb_interactivity_score
;
1117 * For all timeshare buckets, calculate the interactivity score of the bucket
1118 * and add it to the base priority
1120 uint8_t interactive_score
= 0;
1121 if (cpu_blocked
> cpu_used
) {
1122 /* Interactive clutch_bucket case */
1123 interactive_score
= sched_clutch_bucket_interactive_pri
+
1124 ((sched_clutch_bucket_interactive_pri
* (cpu_blocked
- cpu_used
)) / cpu_blocked
);
1126 /* Non-interactive clutch_bucket case */
1127 interactive_score
= ((sched_clutch_bucket_interactive_pri
* cpu_blocked
) / cpu_used
);
1129 clutch_bucket
->scb_interactivity_score
= interactive_score
;
1130 return interactive_score
;
1134 * sched_clutch_bucket_pri_calculate()
1136 * The priority calculation algorithm for the clutch_bucket is a slight
1137 * modification on the ULE interactivity score. It uses the base priority
1138 * of the clutch bucket and applies an interactivity score boost to the
1139 * highly responsive clutch buckets.
1143 sched_clutch_bucket_pri_calculate(
1144 sched_clutch_bucket_t clutch_bucket
,
1147 /* For empty clutch buckets, return priority 0 */
1148 if (clutch_bucket
->scb_thr_count
== 0) {
1152 uint8_t base_pri
= sched_clutch_bucket_base_pri(clutch_bucket
);
1153 uint8_t interactive_score
= sched_clutch_bucket_interactivity_score_calculate(clutch_bucket
, timestamp
);
1155 assert(((uint64_t)base_pri
+ interactive_score
) <= UINT8_MAX
);
1156 uint8_t pri
= base_pri
+ interactive_score
;
1157 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_TG_BUCKET_PRI
) | DBG_FUNC_NONE
,
1158 thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, pri
, interactive_score
, 0);
1163 * sched_clutch_root_bucket_highest_clutch_bucket()
1165 * Routine to find the highest priority clutch bucket
1166 * within the root bucket.
1168 static sched_clutch_bucket_t
1169 sched_clutch_root_bucket_highest_clutch_bucket(
1170 sched_clutch_root_bucket_t root_bucket
)
1172 if (sched_clutch_bucket_runq_empty(&root_bucket
->scrb_clutch_buckets
)) {
1175 return sched_clutch_bucket_runq_peek(&root_bucket
->scrb_clutch_buckets
);
1179 * sched_clutch_bucket_runnable()
1181 * Perform all operations needed when a new clutch bucket becomes runnable.
1182 * It involves inserting the clutch_bucket into the hierarchy and updating the
1183 * root priority appropriately.
1186 sched_clutch_bucket_runnable(
1187 sched_clutch_bucket_t clutch_bucket
,
1188 sched_clutch_root_t root_clutch
,
1190 sched_clutch_bucket_options_t options
)
1192 sched_clutch_hierarchy_locked_assert(root_clutch
);
1193 sched_clutch_bucket_cpu_blocked_update(clutch_bucket
, timestamp
);
1194 clutch_bucket
->scb_priority
= sched_clutch_bucket_pri_calculate(clutch_bucket
, timestamp
);
1195 sched_clutch_bucket_hierarchy_insert(root_clutch
, clutch_bucket
, clutch_bucket
->scb_bucket
, timestamp
, options
);
1196 /* Update the timesharing properties of this clutch_bucket; also done every sched_tick */
1197 sched_clutch_bucket_timeshare_update(clutch_bucket
);
1198 int16_t root_old_pri
= root_clutch
->scr_priority
;
1199 sched_clutch_root_pri_update(root_clutch
);
1200 return root_clutch
->scr_priority
> root_old_pri
;
1204 * sched_clutch_bucket_update()
1206 * Update the clutch_bucket's position in the hierarchy. This routine is
1207 * called when a new thread is inserted or removed from a runnable clutch
1208 * bucket. The options specify some properties about the clutch bucket
1209 * insertion order into the clutch bucket runq.
1212 sched_clutch_bucket_update(
1213 sched_clutch_bucket_t clutch_bucket
,
1214 sched_clutch_root_t root_clutch
,
1216 sched_clutch_bucket_options_t options
)
1218 sched_clutch_hierarchy_locked_assert(root_clutch
);
1219 uint64_t new_pri
= sched_clutch_bucket_pri_calculate(clutch_bucket
, timestamp
);
1220 sched_clutch_bucket_runq_t bucket_runq
= &root_clutch
->scr_buckets
[clutch_bucket
->scb_bucket
].scrb_clutch_buckets
;
1221 if (new_pri
== clutch_bucket
->scb_priority
) {
1223 * If SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR is specified, move the clutch bucket
1224 * to the end of the runq. Typically used when a thread is selected for execution
1225 * from a clutch bucket.
1227 if (options
& SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR
) {
1228 sched_clutch_bucket_runq_rotate(bucket_runq
, clutch_bucket
);
1232 sched_clutch_bucket_runq_remove(bucket_runq
, clutch_bucket
);
1233 clutch_bucket
->scb_priority
= new_pri
;
1234 sched_clutch_bucket_runq_enqueue(bucket_runq
, clutch_bucket
, options
);
1236 int16_t root_old_pri
= root_clutch
->scr_priority
;
1237 sched_clutch_root_pri_update(root_clutch
);
1238 return root_clutch
->scr_priority
> root_old_pri
;
1242 * sched_clutch_bucket_empty()
1244 * Perform all the operations needed when a clutch_bucket is no longer runnable.
1245 * It involves removing the clutch bucket from the hierarchy and updaing the root
1246 * priority appropriately.
1249 sched_clutch_bucket_empty(
1250 sched_clutch_bucket_t clutch_bucket
,
1251 sched_clutch_root_t root_clutch
,
1253 sched_clutch_bucket_options_t options
)
1255 sched_clutch_hierarchy_locked_assert(root_clutch
);
1256 sched_clutch_bucket_hierarchy_remove(root_clutch
, clutch_bucket
, clutch_bucket
->scb_bucket
, timestamp
, options
);
1257 clutch_bucket
->scb_priority
= sched_clutch_bucket_pri_calculate(clutch_bucket
, timestamp
);
1258 sched_clutch_root_pri_update(root_clutch
);
1262 * sched_clutch_cpu_usage_update()
1264 * Routine to update CPU usage of the thread in the hierarchy.
1267 sched_clutch_cpu_usage_update(
1271 if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
1274 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1275 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[thread
->th_sched_bucket
]);
1276 sched_clutch_bucket_cpu_usage_update(clutch_bucket
, delta
);
1280 * sched_clutch_bucket_cpu_usage_update()
1282 * Routine to update the CPU usage of the clutch_bucket.
1285 sched_clutch_bucket_cpu_usage_update(
1286 sched_clutch_bucket_t clutch_bucket
,
1289 if (clutch_bucket
->scb_bucket
== TH_BUCKET_FIXPRI
) {
1290 /* Since Above UI bucket has maximum interactivity score always, nothing to do here */
1295 * The CPU usage should not overflow the clutch_cpu_data_t type. Since the usage is used to
1296 * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX.
1298 delta
= MIN(delta
, CLUTCH_CPU_DATA_MAX
);
1299 os_atomic_add_orig(&(clutch_bucket
->scb_cpu_data
.cpu_data
.scbcd_cpu_used
), (clutch_cpu_data_t
)delta
, relaxed
);
1303 * sched_clutch_bucket_cpu_blocked_update()
1305 * Routine to update CPU blocked time for clutch_bucket.
1308 sched_clutch_bucket_cpu_blocked_update(
1309 sched_clutch_bucket_t clutch_bucket
,
1312 if ((clutch_bucket
->scb_bucket
== TH_BUCKET_FIXPRI
) ||
1313 (clutch_bucket
->scb_blocked_ts
== SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID
)) {
1314 /* For Above UI bucket and a newly initialized clutch bucket, nothing to do here */
1318 uint64_t blocked_time
= timestamp
- clutch_bucket
->scb_blocked_ts
;
1319 if (blocked_time
> sched_clutch_bucket_adjust_threshold
) {
1320 blocked_time
= sched_clutch_bucket_adjust_threshold
;
1324 * The CPU blocked should not overflow the clutch_cpu_data_t type. Since the blocked is used to
1325 * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX.
1327 blocked_time
= MIN(blocked_time
, CLUTCH_CPU_DATA_MAX
);
1328 clutch_cpu_data_t __assert_only cpu_blocked_orig
= os_atomic_add_orig(&(clutch_bucket
->scb_cpu_data
.cpu_data
.scbcd_cpu_blocked
), (clutch_cpu_data_t
)blocked_time
, relaxed
);
1329 /* The blocked time is scaled every so often, it should never overflow */
1330 assert(blocked_time
<= (CLUTCH_CPU_DATA_MAX
- cpu_blocked_orig
));
1334 * sched_clutch_bucket_cpu_adjust()
1336 * Routine to scale the cpu usage and blocked time once the sum gets bigger
1337 * than sched_clutch_bucket_adjust_threshold. Allows the values to remain
1338 * manageable and maintain the same ratio while allowing clutch buckets to
1339 * adjust behavior and reflect in the interactivity score in a reasonable
1343 sched_clutch_bucket_cpu_adjust(
1344 sched_clutch_bucket_t clutch_bucket
)
1346 sched_clutch_bucket_cpu_data_t old_cpu_data
= {};
1347 sched_clutch_bucket_cpu_data_t new_cpu_data
= {};
1349 old_cpu_data
.scbcd_cpu_data_packed
= os_atomic_load_wide(&clutch_bucket
->scb_cpu_data
.scbcd_cpu_data_packed
, relaxed
);
1350 clutch_cpu_data_t cpu_used
= old_cpu_data
.cpu_data
.scbcd_cpu_used
;
1351 clutch_cpu_data_t cpu_blocked
= old_cpu_data
.cpu_data
.scbcd_cpu_blocked
;
1352 if ((cpu_used
+ cpu_blocked
) < sched_clutch_bucket_adjust_threshold
) {
1357 * The accumulation of CPU used and blocked is past the threshold; scale it
1358 * down to lose old history.
1360 new_cpu_data
.cpu_data
.scbcd_cpu_used
= cpu_used
/ SCHED_CLUTCH_BUCKET_ADJUST_RATIO
;
1361 new_cpu_data
.cpu_data
.scbcd_cpu_blocked
= cpu_blocked
/ SCHED_CLUTCH_BUCKET_ADJUST_RATIO
;
1362 } while (!os_atomic_cmpxchg(&clutch_bucket
->scb_cpu_data
.scbcd_cpu_data_packed
, old_cpu_data
.scbcd_cpu_data_packed
, new_cpu_data
.scbcd_cpu_data_packed
, relaxed
));
1366 * Thread level scheduling algorithm
1368 * The thread level scheduling algorithm uses the mach timeshare
1369 * decay based algorithm to achieve sharing between threads within the
1370 * same clutch bucket. The load/priority shifts etc. are all maintained
1371 * at the clutch bucket level and used for decay calculation of the
1372 * threads. The load sampling is still driven off the scheduler tick
1373 * for runnable clutch buckets (it does not use the new higher frequency
1374 * EWMA based load calculation). The idea is that the contention and load
1375 * within clutch_buckets should be limited enough to not see heavy decay
1376 * and timeshare effectively.
1380 * sched_clutch_thread_run_bucket_incr() / sched_clutch_run_bucket_incr()
1382 * Increment the run count for the clutch bucket associated with the
1386 sched_clutch_thread_run_bucket_incr(
1388 sched_bucket_t bucket
)
1390 if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
1393 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1394 return sched_clutch_run_bucket_incr(clutch
, bucket
);
1398 sched_clutch_run_bucket_incr(
1399 sched_clutch_t clutch
,
1400 sched_bucket_t bucket
)
1402 assert(bucket
!= TH_BUCKET_RUN
);
1403 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[bucket
]);
1404 uint32_t result
= os_atomic_inc(&(clutch_bucket
->scb_run_count
), relaxed
);
1409 * sched_clutch_thread_run_bucket_decr() / sched_clutch_run_bucket_decr()
1411 * Decrement the run count for the clutch bucket associated with the
1415 sched_clutch_thread_run_bucket_decr(
1417 sched_bucket_t bucket
)
1419 if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
1422 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1423 return sched_clutch_run_bucket_decr(clutch
, bucket
);
1427 sched_clutch_run_bucket_decr(
1428 sched_clutch_t clutch
,
1429 sched_bucket_t bucket
)
1431 assert(bucket
!= TH_BUCKET_RUN
);
1432 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[bucket
]);
1433 uint32_t result
= os_atomic_dec(&(clutch_bucket
->scb_run_count
), relaxed
);
1438 * sched_clutch_bucket_timeshare_update()
1440 * Routine to update the load and priority shift for the clutch_bucket every
1441 * sched_tick. For runnable clutch_buckets, the sched tick handling code
1442 * iterates the clutch buckets and calls this routine. For all others, the
1443 * clutch_bucket maintains a "last updated schedtick" parameter. As threads
1444 * become runnable in the clutch bucket, if this value is outdated, the load
1445 * and shifts are updated.
1447 * Possible optimization:
1448 * - The current algorithm samples the load every sched tick (125ms).
1449 * This is prone to spikes in runnable counts; if that turns out to be
1450 * a problem, a simple solution would be to do the EWMA trick to sample
1451 * load at every load_tick (30ms) and use the averaged value for the pri
1452 * shift calculation.
1455 sched_clutch_bucket_timeshare_update(
1456 sched_clutch_bucket_t clutch_bucket
)
1458 if (clutch_bucket
->scb_bucket
< TH_BUCKET_SHARE_FG
) {
1463 * Update the timeshare parameters for the clutch bucket if they havent been updated
1466 uint32_t bucket_sched_ts
= os_atomic_load(&clutch_bucket
->scb_timeshare_tick
, relaxed
);
1467 uint32_t current_sched_ts
= sched_tick
;
1468 if (bucket_sched_ts
!= current_sched_ts
) {
1469 os_atomic_store(&clutch_bucket
->scb_timeshare_tick
, current_sched_ts
, relaxed
);
1470 uint32_t bucket_load
= (os_atomic_load(&clutch_bucket
->scb_run_count
, relaxed
) / processor_avail_count
);
1471 bucket_load
= MIN(bucket_load
, NRQS
- 1);
1472 uint32_t pri_shift
= sched_fixed_shift
- sched_load_shifts
[bucket_load
];
1473 os_atomic_store(&clutch_bucket
->scb_pri_shift
, pri_shift
, relaxed
);
1478 * sched_clutch_thread_clutch_update()
1480 * Routine called when the thread changes its thread group. The current
1481 * implementation relies on the fact that the thread group is changed only
1482 * from the context of the thread itself. Due to this fact, the thread
1483 * group change causes only counter updates in the old & new clutch
1484 * buckets and no hierarchy changes. The routine also attributes the CPU
1485 * used so far to the old clutch.
1488 sched_clutch_thread_clutch_update(
1490 sched_clutch_t old_clutch
,
1491 sched_clutch_t new_clutch
)
1494 assert(current_thread() == thread
);
1497 sched_clutch_run_bucket_decr(old_clutch
, thread
->th_sched_bucket
);
1499 * Calculate the CPU used by this thread in the old bucket and
1500 * add it to the old clutch bucket. This uses the same CPU usage
1501 * logic as update_priority etc.
1503 thread_timer_delta(thread
, cpu_delta
);
1504 if (thread
->pri_shift
< INT8_MAX
) {
1505 thread
->sched_usage
+= cpu_delta
;
1507 thread
->cpu_delta
+= cpu_delta
;
1508 sched_clutch_bucket_cpu_usage_update(&(old_clutch
->sc_clutch_buckets
[thread
->th_sched_bucket
]), cpu_delta
);
1512 sched_clutch_run_bucket_incr(new_clutch
, thread
->th_sched_bucket
);
1516 /* Thread Insertion/Removal/Selection routines */
1519 * sched_clutch_thread_insert()
1521 * Routine to insert a thread into the sched clutch hierarchy.
1522 * Update the counts at all levels of the hierarchy and insert the nodes
1523 * as they become runnable. Always called with the pset lock held.
1526 sched_clutch_thread_insert(
1527 sched_clutch_root_t root_clutch
,
1531 boolean_t result
= FALSE
;
1533 sched_clutch_hierarchy_locked_assert(root_clutch
);
1534 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1535 assert(thread
->thread_group
== clutch
->sc_tg
);
1537 uint64_t current_timestamp
= mach_absolute_time();
1538 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[thread
->th_sched_bucket
]);
1539 assert((clutch_bucket
->scb_root
== NULL
) || (clutch_bucket
->scb_root
== root_clutch
));
1541 /* Insert thread into the clutch_bucket runq using sched_pri */
1542 run_queue_enqueue(&clutch_bucket
->scb_runq
, thread
, options
);
1543 /* Increment the urgency counter for the root if necessary */
1544 sched_clutch_root_urgency_inc(root_clutch
, thread
);
1546 /* Insert thread into clutch_bucket priority queue based on the promoted or base priority */
1547 priority_queue_insert(&clutch_bucket
->scb_clutchpri_prioq
, &thread
->sched_clutchpri_link
,
1548 sched_thread_sched_pri_promoted(thread
) ? thread
->sched_pri
: thread
->base_pri
,
1549 PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE
);
1550 os_atomic_inc(&clutch
->sc_thr_count
, relaxed
);
1551 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_THREAD_STATE
) | DBG_FUNC_NONE
,
1552 thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, thread_tid(thread
), SCHED_CLUTCH_STATE_RUNNABLE
, 0);
1554 /* Enqueue the clutch into the hierarchy (if needed) and update properties; pick the insertion order based on thread options */
1555 sched_clutch_bucket_options_t scb_options
= (options
& SCHED_HEADQ
) ? SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ
: SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ
;
1556 if (clutch_bucket
->scb_thr_count
== 0) {
1557 sched_clutch_thr_count_inc(&clutch_bucket
->scb_thr_count
);
1558 sched_clutch_thr_count_inc(&root_clutch
->scr_thr_count
);
1559 result
= sched_clutch_bucket_runnable(clutch_bucket
, root_clutch
, current_timestamp
, scb_options
);
1561 sched_clutch_thr_count_inc(&clutch_bucket
->scb_thr_count
);
1562 sched_clutch_thr_count_inc(&root_clutch
->scr_thr_count
);
1563 result
= sched_clutch_bucket_update(clutch_bucket
, root_clutch
, current_timestamp
, scb_options
);
1569 * sched_clutch_thread_remove()
1571 * Routine to remove a thread from the sched clutch hierarchy.
1572 * Update the counts at all levels of the hierarchy and remove the nodes
1573 * as they become empty. Always called with the pset lock held.
1576 sched_clutch_thread_remove(
1577 sched_clutch_root_t root_clutch
,
1579 uint64_t current_timestamp
,
1580 sched_clutch_bucket_options_t options
)
1582 sched_clutch_hierarchy_locked_assert(root_clutch
);
1583 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1584 assert(thread
->thread_group
== clutch
->sc_tg
);
1585 assert(thread
->runq
!= PROCESSOR_NULL
);
1587 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[thread
->th_sched_bucket
]);
1588 assert(clutch_bucket
->scb_root
== root_clutch
);
1590 /* Decrement the urgency counter for the root if necessary */
1591 sched_clutch_root_urgency_dec(root_clutch
, thread
);
1592 /* Remove thread from the clutch_bucket */
1593 run_queue_remove(&clutch_bucket
->scb_runq
, thread
);
1595 priority_queue_remove(&clutch_bucket
->scb_clutchpri_prioq
, &thread
->sched_clutchpri_link
,
1596 PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE
);
1597 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_THREAD_STATE
) | DBG_FUNC_NONE
,
1598 thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, thread_tid(thread
), SCHED_CLUTCH_STATE_EMPTY
, 0);
1600 /* Update counts at various levels of the hierarchy */
1601 os_atomic_dec(&clutch
->sc_thr_count
, relaxed
);
1602 sched_clutch_thr_count_dec(&root_clutch
->scr_thr_count
);
1603 sched_clutch_thr_count_dec(&clutch_bucket
->scb_thr_count
);
1605 /* Remove the clutch from hierarchy (if needed) and update properties */
1606 if (clutch_bucket
->scb_thr_count
== 0) {
1607 sched_clutch_bucket_empty(clutch_bucket
, root_clutch
, current_timestamp
, options
);
1609 sched_clutch_bucket_update(clutch_bucket
, root_clutch
, current_timestamp
, options
);
1614 * sched_clutch_thread_highest()
1616 * Routine to find and remove the highest priority thread
1617 * from the sched clutch hierarchy. The algorithm looks at the
1618 * hierarchy for the most eligible runnable thread and calls
1619 * sched_clutch_thread_remove(). Always called with the
1623 sched_clutch_thread_highest(
1624 sched_clutch_root_t root_clutch
)
1626 sched_clutch_hierarchy_locked_assert(root_clutch
);
1627 uint64_t current_timestamp
= mach_absolute_time();
1629 /* Select the highest priority root bucket */
1630 sched_clutch_root_bucket_t root_bucket
= sched_clutch_root_highest_root_bucket(root_clutch
, current_timestamp
);
1631 if (root_bucket
== NULL
) {
1634 /* Since a thread is being picked from this root bucket, update its deadline */
1635 sched_clutch_root_bucket_deadline_update(root_bucket
, root_clutch
, current_timestamp
);
1637 /* Find the highest priority clutch bucket in this root bucket */
1638 sched_clutch_bucket_t clutch_bucket
= sched_clutch_root_bucket_highest_clutch_bucket(root_bucket
);
1639 assert(clutch_bucket
!= NULL
);
1641 /* Find the highest priority runnable thread in this clutch bucket */
1642 thread_t thread
= run_queue_peek(&clutch_bucket
->scb_runq
);
1643 assert(thread
!= NULL
);
1645 /* Remove and return the thread from the hierarchy; also round robin the clutch bucket if the priority remains unchanged */
1646 sched_clutch_thread_remove(root_clutch
, thread
, current_timestamp
, SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR
);
1647 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH
, MACH_SCHED_CLUTCH_THREAD_SELECT
) | DBG_FUNC_NONE
,
1648 thread_tid(thread
), thread_group_get_id(clutch_bucket
->scb_clutch
->sc_tg
), clutch_bucket
->scb_bucket
, 0, 0);
1653 /* High level global accessor routines */
1656 * sched_clutch_root_urgency()
1658 * Routine to get the urgency of the highest runnable
1659 * thread in the hierarchy.
1662 sched_clutch_root_urgency(
1663 sched_clutch_root_t root_clutch
)
1665 return root_clutch
->scr_urgency
;
1669 * sched_clutch_root_count_sum()
1671 * The count_sum mechanism is used for scheduler runq
1672 * statistics calculation. Its only useful for debugging
1673 * purposes; since it takes a mach_absolute_time() on
1674 * other scheduler implementations, its better to avoid
1675 * populating this until absolutely necessary.
1678 sched_clutch_root_count_sum(
1679 __unused sched_clutch_root_t root_clutch
)
1685 * sched_clutch_root_priority()
1687 * Routine to get the priority of the highest runnable
1688 * thread in the hierarchy.
1691 sched_clutch_root_priority(
1692 sched_clutch_root_t root_clutch
)
1694 return root_clutch
->scr_priority
;
1698 * sched_clutch_root_count()
1700 * Returns total number of runnable threads in the hierarchy.
1703 sched_clutch_root_count(
1704 sched_clutch_root_t root_clutch
)
1706 return root_clutch
->scr_thr_count
;
1710 * sched_clutch_thread_pri_shift()
1712 * Routine to get the priority shift value for a thread.
1713 * Since the timesharing is done at the clutch_bucket level,
1714 * this routine gets the clutch_bucket and retrieves the
1715 * values from there.
1718 sched_clutch_thread_pri_shift(
1720 sched_bucket_t bucket
)
1722 if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
1725 assert(bucket
!= TH_BUCKET_RUN
);
1726 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
1727 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[bucket
]);
1728 return os_atomic_load(&clutch_bucket
->scb_pri_shift
, relaxed
);
1731 #pragma mark -- Clutch Scheduler Algorithm
1734 sched_clutch_init(void);
1737 sched_clutch_timebase_init(void);
1740 sched_clutch_steal_thread(processor_set_t pset
);
1743 sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context
);
1746 sched_clutch_processor_enqueue(processor_t processor
, thread_t thread
,
1747 sched_options_t options
);
1750 sched_clutch_processor_queue_remove(processor_t processor
, thread_t thread
);
1753 sched_clutch_processor_csw_check(processor_t processor
);
1756 sched_clutch_processor_queue_has_priority(processor_t processor
, int priority
, boolean_t gte
);
1759 sched_clutch_runq_count(processor_t processor
);
1762 sched_clutch_processor_queue_empty(processor_t processor
);
1765 sched_clutch_runq_stats_count_sum(processor_t processor
);
1768 sched_clutch_processor_bound_count(processor_t processor
);
1771 sched_clutch_pset_init(processor_set_t pset
);
1774 sched_clutch_processor_init(processor_t processor
);
1777 sched_clutch_choose_thread(processor_t processor
, int priority
, ast_t reason
);
1780 sched_clutch_processor_queue_shutdown(processor_t processor
);
1783 sched_clutch_initial_thread_sched_mode(task_t parent_task
);
1786 sched_clutch_initial_quantum_size(thread_t thread
);
1789 sched_clutch_thread_avoid_processor(processor_t processor
, thread_t thread
);
1792 sched_clutch_run_incr(thread_t thread
);
1795 sched_clutch_run_decr(thread_t thread
);
1798 sched_clutch_update_thread_bucket(thread_t thread
);
1800 const struct sched_dispatch_table sched_clutch_dispatch
= {
1801 .sched_name
= "clutch",
1802 .init
= sched_clutch_init
,
1803 .timebase_init
= sched_clutch_timebase_init
,
1804 .processor_init
= sched_clutch_processor_init
,
1805 .pset_init
= sched_clutch_pset_init
,
1806 .maintenance_continuation
= sched_timeshare_maintenance_continue
,
1807 .choose_thread
= sched_clutch_choose_thread
,
1808 .steal_thread_enabled
= sched_steal_thread_enabled
,
1809 .steal_thread
= sched_clutch_steal_thread
,
1810 .compute_timeshare_priority
= sched_compute_timeshare_priority
,
1811 .choose_processor
= choose_processor
,
1812 .processor_enqueue
= sched_clutch_processor_enqueue
,
1813 .processor_queue_shutdown
= sched_clutch_processor_queue_shutdown
,
1814 .processor_queue_remove
= sched_clutch_processor_queue_remove
,
1815 .processor_queue_empty
= sched_clutch_processor_queue_empty
,
1816 .priority_is_urgent
= priority_is_urgent
,
1817 .processor_csw_check
= sched_clutch_processor_csw_check
,
1818 .processor_queue_has_priority
= sched_clutch_processor_queue_has_priority
,
1819 .initial_quantum_size
= sched_clutch_initial_quantum_size
,
1820 .initial_thread_sched_mode
= sched_clutch_initial_thread_sched_mode
,
1821 .can_update_priority
= can_update_priority
,
1822 .update_priority
= update_priority
,
1823 .lightweight_update_priority
= lightweight_update_priority
,
1824 .quantum_expire
= sched_default_quantum_expire
,
1825 .processor_runq_count
= sched_clutch_runq_count
,
1826 .processor_runq_stats_count_sum
= sched_clutch_runq_stats_count_sum
,
1827 .processor_bound_count
= sched_clutch_processor_bound_count
,
1828 .thread_update_scan
= sched_clutch_thread_update_scan
,
1829 .multiple_psets_enabled
= TRUE
,
1830 .sched_groups_enabled
= FALSE
,
1831 .avoid_processor_enabled
= TRUE
,
1832 .thread_avoid_processor
= sched_clutch_thread_avoid_processor
,
1833 .processor_balance
= sched_SMT_balance
,
1835 .rt_runq
= sched_rtglobal_runq
,
1836 .rt_init
= sched_rtglobal_init
,
1837 .rt_queue_shutdown
= sched_rtglobal_queue_shutdown
,
1838 .rt_runq_scan
= sched_rtglobal_runq_scan
,
1839 .rt_runq_count_sum
= sched_rtglobal_runq_count_sum
,
1841 .qos_max_parallelism
= sched_qos_max_parallelism
,
1842 .check_spill
= sched_check_spill
,
1843 .ipi_policy
= sched_ipi_policy
,
1844 .thread_should_yield
= sched_thread_should_yield
,
1845 .run_count_incr
= sched_clutch_run_incr
,
1846 .run_count_decr
= sched_clutch_run_decr
,
1847 .update_thread_bucket
= sched_clutch_update_thread_bucket
,
1848 .pset_made_schedulable
= sched_pset_made_schedulable
,
1851 __attribute__((always_inline
))
1852 static inline run_queue_t
1853 sched_clutch_bound_runq(processor_t processor
)
1855 return &processor
->runq
;
1858 __attribute__((always_inline
))
1859 static inline sched_clutch_root_t
1860 sched_clutch_processor_root_clutch(processor_t processor
)
1862 return &processor
->processor_set
->pset_clutch_root
;
1865 __attribute__((always_inline
))
1866 static inline run_queue_t
1867 sched_clutch_thread_bound_runq(processor_t processor
, __assert_only thread_t thread
)
1869 assert(thread
->bound_processor
== processor
);
1870 return sched_clutch_bound_runq(processor
);
1874 sched_clutch_initial_quantum_size(thread_t thread
)
1876 if (thread
== THREAD_NULL
) {
1879 assert(sched_clutch_thread_quantum
[thread
->th_sched_bucket
] <= UINT32_MAX
);
1880 return (uint32_t)sched_clutch_thread_quantum
[thread
->th_sched_bucket
];
1884 sched_clutch_initial_thread_sched_mode(task_t parent_task
)
1886 if (parent_task
== kernel_task
) {
1887 return TH_MODE_FIXED
;
1889 return TH_MODE_TIMESHARE
;
1894 sched_clutch_processor_init(processor_t processor
)
1896 run_queue_init(&processor
->runq
);
1900 sched_clutch_pset_init(processor_set_t pset
)
1902 sched_clutch_root_init(&pset
->pset_clutch_root
, pset
);
1906 sched_clutch_init(void)
1908 if (!PE_parse_boot_argn("sched_clutch_bucket_interactive_pri", &sched_clutch_bucket_interactive_pri
, sizeof(sched_clutch_bucket_interactive_pri
))) {
1909 sched_clutch_bucket_interactive_pri
= SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT
;
1911 if (!PE_parse_boot_argn("sched_clutch_bucket_pri_boost", &sched_clutch_bucket_pri_boost
, sizeof(sched_clutch_bucket_pri_boost
))) {
1912 sched_clutch_bucket_pri_boost
= SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT
;
1914 sched_timeshare_init();
1918 sched_clutch_timebase_init(void)
1920 sched_timeshare_timebase_init();
1921 sched_clutch_us_to_abstime(sched_clutch_root_bucket_wcel_us
, sched_clutch_root_bucket_wcel
);
1922 sched_clutch_us_to_abstime(sched_clutch_root_bucket_warp_us
, sched_clutch_root_bucket_warp
);
1923 sched_clutch_us_to_abstime(sched_clutch_thread_quantum_us
, sched_clutch_thread_quantum
);
1924 clock_interval_to_absolutetime_interval(SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS
,
1925 NSEC_PER_USEC
, &sched_clutch_bucket_adjust_threshold
);
1927 uint32_t interactivity_delta
= 0;
1928 if (!PE_parse_boot_argn("sched_clutch_bucket_interactivity_delta_usecs", &interactivity_delta
, sizeof(interactivity_delta
))) {
1929 interactivity_delta
= SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT
;
1931 clock_interval_to_absolutetime_interval(interactivity_delta
, NSEC_PER_USEC
, &sched_clutch_bucket_interactivity_delta
);
1935 sched_clutch_choose_thread(
1936 processor_t processor
,
1938 __unused ast_t reason
)
1940 int clutch_pri
= sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
));
1941 uint32_t clutch_count
= sched_clutch_root_count(sched_clutch_processor_root_clutch(processor
));
1942 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
1943 boolean_t choose_from_boundq
= false;
1945 if (bound_runq
->highq
< priority
&&
1946 clutch_pri
< priority
) {
1950 if (bound_runq
->count
&& clutch_count
) {
1951 if (bound_runq
->highq
>= clutch_pri
) {
1952 choose_from_boundq
= true;
1954 } else if (bound_runq
->count
) {
1955 choose_from_boundq
= true;
1956 } else if (clutch_count
) {
1957 choose_from_boundq
= false;
1962 thread_t thread
= THREAD_NULL
;
1963 if (choose_from_boundq
== false) {
1964 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
1965 thread
= sched_clutch_thread_highest(pset_clutch_root
);
1967 thread
= run_queue_dequeue(bound_runq
, SCHED_HEADQ
);
1973 sched_clutch_processor_enqueue(
1974 processor_t processor
,
1976 sched_options_t options
)
1980 thread
->runq
= processor
;
1981 if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
1982 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
1983 result
= sched_clutch_thread_insert(pset_clutch_root
, thread
, options
);
1985 run_queue_t rq
= sched_clutch_thread_bound_runq(processor
, thread
);
1986 result
= run_queue_enqueue(rq
, thread
, options
);
1992 sched_clutch_processor_queue_empty(processor_t processor
)
1994 return sched_clutch_root_count(sched_clutch_processor_root_clutch(processor
)) == 0 &&
1995 sched_clutch_bound_runq(processor
)->count
== 0;
1999 sched_clutch_processor_csw_check(processor_t processor
)
2001 boolean_t has_higher
;
2004 if (sched_clutch_thread_avoid_processor(processor
, current_thread())) {
2005 return AST_PREEMPT
| AST_URGENT
;
2008 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
2009 int clutch_pri
= sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
));
2011 assert(processor
->active_thread
!= NULL
);
2013 pri
= MAX(clutch_pri
, bound_runq
->highq
);
2015 if (processor
->first_timeslice
) {
2016 has_higher
= (pri
> processor
->current_pri
);
2018 has_higher
= (pri
>= processor
->current_pri
);
2022 if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor
)) > 0) {
2023 return AST_PREEMPT
| AST_URGENT
;
2026 if (bound_runq
->urgency
> 0) {
2027 return AST_PREEMPT
| AST_URGENT
;
2037 sched_clutch_processor_queue_has_priority(processor_t processor
,
2041 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
2043 int qpri
= MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
)), bound_runq
->highq
);
2046 return qpri
>= priority
;
2048 return qpri
> priority
;
2053 sched_clutch_runq_count(processor_t processor
)
2055 return (int)sched_clutch_root_count(sched_clutch_processor_root_clutch(processor
)) + sched_clutch_bound_runq(processor
)->count
;
2059 sched_clutch_runq_stats_count_sum(processor_t processor
)
2061 uint64_t bound_sum
= sched_clutch_bound_runq(processor
)->runq_stats
.count_sum
;
2063 if (processor
->cpu_id
== processor
->processor_set
->cpu_set_low
) {
2064 return bound_sum
+ sched_clutch_root_count_sum(sched_clutch_processor_root_clutch(processor
));
2070 sched_clutch_processor_bound_count(processor_t processor
)
2072 return sched_clutch_bound_runq(processor
)->count
;
2076 sched_clutch_processor_queue_shutdown(processor_t processor
)
2078 processor_set_t pset
= processor
->processor_set
;
2079 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
2081 queue_head_t tqueue
;
2083 /* We only need to migrate threads if this is the last active processor in the pset */
2084 if (pset
->online_processor_count
> 0) {
2089 queue_init(&tqueue
);
2090 while (sched_clutch_root_count(pset_clutch_root
) > 0) {
2091 thread
= sched_clutch_thread_highest(pset_clutch_root
);
2092 enqueue_tail(&tqueue
, &thread
->runq_links
);
2097 qe_foreach_element_safe(thread
, &tqueue
, runq_links
) {
2098 remqueue(&thread
->runq_links
);
2100 thread_lock(thread
);
2102 thread_setrun(thread
, SCHED_TAILQ
);
2104 thread_unlock(thread
);
2109 sched_clutch_processor_queue_remove(
2110 processor_t processor
,
2114 processor_set_t pset
= processor
->processor_set
;
2118 if (processor
== thread
->runq
) {
2120 * Thread is on a run queue and we have a lock on
2123 if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread
)) {
2124 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
2125 sched_clutch_thread_remove(pset_clutch_root
, thread
, mach_absolute_time(), SCHED_CLUTCH_BUCKET_OPTIONS_NONE
);
2127 rq
= sched_clutch_thread_bound_runq(processor
, thread
);
2128 run_queue_remove(rq
, thread
);
2132 * The thread left the run queue before we could
2133 * lock the run queue.
2135 assert(thread
->runq
== PROCESSOR_NULL
);
2136 processor
= PROCESSOR_NULL
;
2141 return processor
!= PROCESSOR_NULL
;
2145 sched_clutch_steal_thread(processor_set_t pset
)
2147 processor_set_t nset
, cset
= pset
;
2151 sched_clutch_root_t pset_clutch_root
= &cset
->pset_clutch_root
;
2152 if (sched_clutch_root_count(pset_clutch_root
) > 0) {
2153 thread
= sched_clutch_thread_highest(pset_clutch_root
);
2158 nset
= next_pset(cset
);
2166 } while (nset
!= pset
);
2174 sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context
)
2176 boolean_t restart_needed
= FALSE
;
2177 processor_t processor
= processor_list
;
2178 processor_set_t pset
;
2183 * We update the threads associated with each processor (bound and idle threads)
2184 * and then update the threads in each pset runqueue.
2189 pset
= processor
->processor_set
;
2194 restart_needed
= runq_scan(sched_clutch_bound_runq(processor
), scan_context
);
2199 if (restart_needed
) {
2203 thread
= processor
->idle_thread
;
2204 if (thread
!= THREAD_NULL
&& thread
->sched_stamp
!= sched_tick
) {
2205 if (thread_update_add_thread(thread
) == FALSE
) {
2206 restart_needed
= TRUE
;
2210 } while ((processor
= processor
->processor_list
) != NULL
);
2212 /* Ok, we now have a collection of candidates -- fix them. */
2213 thread_update_process_threads();
2214 } while (restart_needed
);
2223 if (sched_clutch_root_count(&pset
->pset_clutch_root
) > 0) {
2224 queue_t clutch_bucket_list
= &pset
->pset_clutch_root
.scr_clutch_buckets
;
2225 sched_clutch_bucket_t clutch_bucket
;
2226 qe_foreach_element(clutch_bucket
, clutch_bucket_list
, scb_listlink
) {
2227 sched_clutch_bucket_timeshare_update(clutch_bucket
);
2228 restart_needed
= runq_scan(&clutch_bucket
->scb_runq
, scan_context
);
2229 if (restart_needed
) {
2237 if (restart_needed
) {
2240 } while ((pset
= pset
->pset_list
) != NULL
);
2242 /* Ok, we now have a collection of candidates -- fix them. */
2243 thread_update_process_threads();
2244 } while (restart_needed
);
2247 extern int sched_allow_rt_smt
;
2249 /* Return true if this thread should not continue running on this processor */
2251 sched_clutch_thread_avoid_processor(processor_t processor
, thread_t thread
)
2253 if (processor
->processor_primary
!= processor
) {
2255 * This is a secondary SMT processor. If the primary is running
2256 * a realtime thread, only allow realtime threads on the secondary.
2258 if ((processor
->processor_primary
->current_pri
>= BASEPRI_RTQUEUES
) && ((thread
->sched_pri
< BASEPRI_RTQUEUES
) || !sched_allow_rt_smt
)) {
2267 * For the clutch scheduler, the run counts are maintained in the clutch
2268 * buckets (i.e thread group scheduling structure).
2271 sched_clutch_run_incr(thread_t thread
)
2273 assert((thread
->state
& (TH_RUN
| TH_IDLE
)) == TH_RUN
);
2274 uint32_t new_count
= os_atomic_inc(&sched_run_buckets
[TH_BUCKET_RUN
], relaxed
);
2275 sched_clutch_thread_run_bucket_incr(thread
, thread
->th_sched_bucket
);
2280 sched_clutch_run_decr(thread_t thread
)
2282 assert((thread
->state
& (TH_RUN
| TH_IDLE
)) != TH_RUN
);
2283 uint32_t new_count
= os_atomic_dec(&sched_run_buckets
[TH_BUCKET_RUN
], relaxed
);
2284 sched_clutch_thread_run_bucket_decr(thread
, thread
->th_sched_bucket
);
2288 static sched_bucket_t
2289 sched_convert_pri_to_bucket(uint8_t priority
)
2291 sched_bucket_t bucket
= TH_BUCKET_RUN
;
2293 if (priority
> BASEPRI_USER_INITIATED
) {
2294 bucket
= TH_BUCKET_SHARE_FG
;
2295 } else if (priority
> BASEPRI_DEFAULT
) {
2296 bucket
= TH_BUCKET_SHARE_IN
;
2297 } else if (priority
> BASEPRI_UTILITY
) {
2298 bucket
= TH_BUCKET_SHARE_DF
;
2299 } else if (priority
> MAXPRI_THROTTLE
) {
2300 bucket
= TH_BUCKET_SHARE_UT
;
2302 bucket
= TH_BUCKET_SHARE_BG
;
2308 * For threads that have changed sched_pri without changing the
2309 * base_pri for any reason other than decay, use the sched_pri
2310 * as the bucketizing priority instead of base_pri. All such
2311 * changes are typically due to kernel locking primitives boosts
2315 sched_thread_sched_pri_promoted(thread_t thread
)
2317 return (thread
->sched_flags
& TH_SFLAG_PROMOTED
) ||
2318 (thread
->sched_flags
& TH_SFLAG_PROMOTE_REASON_MASK
) ||
2319 (thread
->sched_flags
& TH_SFLAG_DEMOTED_MASK
) ||
2320 (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) ||
2321 (thread
->kern_promotion_schedpri
!= 0);
2325 * Routine to update the scheduling bucket for the thread.
2327 * In the clutch scheduler implementation, the thread's bucket
2328 * is based on sched_pri if it was promoted due to a kernel
2329 * primitive; otherwise its based on the thread base_pri. This
2330 * enhancement allows promoted threads to reach a higher priority
2331 * bucket and potentially get selected sooner for scheduling.
2333 * Also, the clutch scheduler does not honor fixed priority below
2334 * FG priority. It simply puts those threads in the corresponding
2335 * timeshare bucket. The reason for to do that is because it is
2336 * extremely hard to define the scheduling properties of such threads
2337 * and they typically lead to performance issues.
2341 sched_clutch_update_thread_bucket(thread_t thread
)
2343 sched_bucket_t old_bucket
= thread
->th_sched_bucket
;
2344 sched_bucket_t new_bucket
= TH_BUCKET_RUN
;
2345 assert(thread
->runq
== PROCESSOR_NULL
);
2347 int pri
= (sched_thread_sched_pri_promoted(thread
)) ? thread
->sched_pri
: thread
->base_pri
;
2349 switch (thread
->sched_mode
) {
2351 if (pri
>= BASEPRI_FOREGROUND
) {
2352 new_bucket
= TH_BUCKET_FIXPRI
;
2354 new_bucket
= sched_convert_pri_to_bucket(pri
);
2358 case TH_MODE_REALTIME
:
2359 new_bucket
= TH_BUCKET_FIXPRI
;
2362 case TH_MODE_TIMESHARE
:
2363 new_bucket
= sched_convert_pri_to_bucket(pri
);
2367 panic("unexpected mode: %d", thread
->sched_mode
);
2371 if (old_bucket
== new_bucket
) {
2375 thread
->th_sched_bucket
= new_bucket
;
2376 thread
->pri_shift
= sched_clutch_thread_pri_shift(thread
, new_bucket
);
2379 * Since this is called after the thread has been removed from the runq,
2380 * only the run counts need to be updated. The re-insert into the runq
2381 * would put the thread into the correct new bucket's runq.
2383 if ((thread
->state
& (TH_RUN
| TH_IDLE
)) == TH_RUN
) {
2384 sched_clutch_thread_run_bucket_decr(thread
, old_bucket
);
2385 sched_clutch_thread_run_bucket_incr(thread
, new_bucket
);
2391 /* Implementation of the AMP version of the clutch scheduler */
2394 sched_clutch_amp_steal_thread(processor_set_t pset
);
2397 sched_clutch_amp_processor_csw_check(processor_t processor
);
2400 sched_clutch_amp_processor_queue_has_priority(processor_t processor
, int priority
, boolean_t gte
);
2403 sched_clutch_amp_processor_queue_empty(processor_t processor
);
2406 sched_clutch_amp_choose_thread(processor_t processor
, int priority
, ast_t reason
);
2409 sched_clutch_amp_processor_queue_shutdown(processor_t processor
);
2412 sched_clutch_amp_choose_processor(processor_set_t pset
, processor_t processor
, thread_t thread
);
2415 sched_clutch_amp_thread_avoid_processor(processor_t processor
, thread_t thread
);
2418 sched_clutch_amp_thread_should_yield(processor_t processor
, thread_t thread
);
2421 sched_clutch_migrate_foreign_buckets(processor_t processor
, processor_set_t dst_pset
, boolean_t drop_lock
);
2424 sched_clutch_amp_thread_group_recommendation_change(struct thread_group
*tg
, cluster_type_t new_recommendation
);
2426 const struct sched_dispatch_table sched_clutch_amp_dispatch
= {
2427 .sched_name
= "clutch_amp",
2428 .init
= sched_amp_init
,
2429 .timebase_init
= sched_clutch_timebase_init
,
2430 .processor_init
= sched_clutch_processor_init
,
2431 .pset_init
= sched_clutch_pset_init
,
2432 .maintenance_continuation
= sched_timeshare_maintenance_continue
,
2433 .choose_thread
= sched_clutch_amp_choose_thread
,
2434 .steal_thread_enabled
= sched_amp_steal_thread_enabled
,
2435 .steal_thread
= sched_clutch_amp_steal_thread
,
2436 .compute_timeshare_priority
= sched_compute_timeshare_priority
,
2437 .choose_processor
= sched_clutch_amp_choose_processor
,
2438 .processor_enqueue
= sched_clutch_processor_enqueue
,
2439 .processor_queue_shutdown
= sched_clutch_amp_processor_queue_shutdown
,
2440 .processor_queue_remove
= sched_clutch_processor_queue_remove
,
2441 .processor_queue_empty
= sched_clutch_amp_processor_queue_empty
,
2442 .priority_is_urgent
= priority_is_urgent
,
2443 .processor_csw_check
= sched_clutch_amp_processor_csw_check
,
2444 .processor_queue_has_priority
= sched_clutch_amp_processor_queue_has_priority
,
2445 .initial_quantum_size
= sched_clutch_initial_quantum_size
,
2446 .initial_thread_sched_mode
= sched_clutch_initial_thread_sched_mode
,
2447 .can_update_priority
= can_update_priority
,
2448 .update_priority
= update_priority
,
2449 .lightweight_update_priority
= lightweight_update_priority
,
2450 .quantum_expire
= sched_default_quantum_expire
,
2451 .processor_runq_count
= sched_clutch_runq_count
,
2452 .processor_runq_stats_count_sum
= sched_clutch_runq_stats_count_sum
,
2453 .processor_bound_count
= sched_clutch_processor_bound_count
,
2454 .thread_update_scan
= sched_clutch_thread_update_scan
,
2455 .multiple_psets_enabled
= TRUE
,
2456 .sched_groups_enabled
= FALSE
,
2457 .avoid_processor_enabled
= TRUE
,
2458 .thread_avoid_processor
= sched_clutch_amp_thread_avoid_processor
,
2459 .processor_balance
= sched_amp_balance
,
2461 .rt_runq
= sched_amp_rt_runq
,
2462 .rt_init
= sched_amp_rt_init
,
2463 .rt_queue_shutdown
= sched_amp_rt_queue_shutdown
,
2464 .rt_runq_scan
= sched_amp_rt_runq_scan
,
2465 .rt_runq_count_sum
= sched_amp_rt_runq_count_sum
,
2467 .qos_max_parallelism
= sched_amp_qos_max_parallelism
,
2468 .check_spill
= sched_amp_check_spill
,
2469 .ipi_policy
= sched_amp_ipi_policy
,
2470 .thread_should_yield
= sched_clutch_amp_thread_should_yield
,
2471 .run_count_incr
= sched_clutch_run_incr
,
2472 .run_count_decr
= sched_clutch_run_decr
,
2473 .update_thread_bucket
= sched_clutch_update_thread_bucket
,
2474 .pset_made_schedulable
= sched_clutch_migrate_foreign_buckets
,
2475 .thread_group_recommendation_change
= sched_clutch_amp_thread_group_recommendation_change
,
2478 extern processor_set_t ecore_set
;
2479 extern processor_set_t pcore_set
;
2482 sched_clutch_amp_choose_thread(
2483 processor_t processor
,
2485 __unused ast_t reason
)
2487 processor_set_t pset
= processor
->processor_set
;
2488 bool spill_pending
= false;
2491 if (pset
== ecore_set
&& bit_test(pset
->pending_spill_cpu_mask
, processor
->cpu_id
)) {
2492 spill_pending
= true;
2493 spill_pri
= sched_clutch_root_priority(&pcore_set
->pset_clutch_root
);
2496 int clutch_pri
= sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
));
2497 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
2498 boolean_t choose_from_boundq
= false;
2500 if ((bound_runq
->highq
< priority
) &&
2501 (clutch_pri
< priority
) &&
2502 (spill_pri
< priority
)) {
2506 if ((spill_pri
> bound_runq
->highq
) &&
2507 (spill_pri
> clutch_pri
)) {
2509 * There is a higher priority thread on the P-core runq,
2510 * so returning THREAD_NULL here will cause thread_select()
2511 * to call sched_clutch_amp_steal_thread() to try to get it.
2516 if (bound_runq
->highq
>= clutch_pri
) {
2517 choose_from_boundq
= true;
2520 thread_t thread
= THREAD_NULL
;
2521 if (choose_from_boundq
== false) {
2522 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
2523 thread
= sched_clutch_thread_highest(pset_clutch_root
);
2525 thread
= run_queue_dequeue(bound_runq
, SCHED_HEADQ
);
2531 sched_clutch_amp_processor_queue_empty(processor_t processor
)
2533 processor_set_t pset
= processor
->processor_set
;
2534 bool spill_pending
= bit_test(pset
->pending_spill_cpu_mask
, processor
->cpu_id
);
2536 return (sched_clutch_root_count(sched_clutch_processor_root_clutch(processor
)) == 0) &&
2537 (sched_clutch_bound_runq(processor
)->count
== 0) &&
2542 sched_clutch_amp_thread_should_yield(processor_t processor
, thread_t thread
)
2544 if (!sched_clutch_amp_processor_queue_empty(processor
) || (rt_runq_count(processor
->processor_set
) > 0)) {
2548 if ((processor
->processor_set
->pset_cluster_type
== PSET_AMP_E
) && (recommended_pset_type(thread
) == PSET_AMP_P
)) {
2549 return sched_clutch_root_count(&pcore_set
->pset_clutch_root
) > 0;
2556 sched_clutch_amp_processor_csw_check(processor_t processor
)
2558 boolean_t has_higher
;
2561 int clutch_pri
= sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
));
2562 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
2564 assert(processor
->active_thread
!= NULL
);
2566 processor_set_t pset
= processor
->processor_set
;
2567 bool spill_pending
= false;
2569 int spill_urgency
= 0;
2571 if (pset
== ecore_set
&& bit_test(pset
->pending_spill_cpu_mask
, processor
->cpu_id
)) {
2572 spill_pending
= true;
2573 spill_pri
= sched_clutch_root_priority(&pcore_set
->pset_clutch_root
);
2574 spill_urgency
= sched_clutch_root_urgency(&pcore_set
->pset_clutch_root
);
2577 pri
= MAX(clutch_pri
, bound_runq
->highq
);
2578 if (spill_pending
) {
2579 pri
= MAX(pri
, spill_pri
);
2582 if (processor
->first_timeslice
) {
2583 has_higher
= (pri
> processor
->current_pri
);
2585 has_higher
= (pri
>= processor
->current_pri
);
2589 if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor
)) > 0) {
2590 return AST_PREEMPT
| AST_URGENT
;
2593 if (bound_runq
->urgency
> 0) {
2594 return AST_PREEMPT
| AST_URGENT
;
2597 if (spill_urgency
> 0) {
2598 return AST_PREEMPT
| AST_URGENT
;
2608 sched_clutch_amp_processor_queue_has_priority(processor_t processor
,
2612 bool spill_pending
= false;
2614 processor_set_t pset
= processor
->processor_set
;
2616 if (pset
== ecore_set
&& bit_test(pset
->pending_spill_cpu_mask
, processor
->cpu_id
)) {
2617 spill_pending
= true;
2618 spill_pri
= sched_clutch_root_priority(&pcore_set
->pset_clutch_root
);
2620 run_queue_t bound_runq
= sched_clutch_bound_runq(processor
);
2622 int qpri
= MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor
)), bound_runq
->highq
);
2623 if (spill_pending
) {
2624 qpri
= MAX(qpri
, spill_pri
);
2628 return qpri
>= priority
;
2630 return qpri
> priority
;
2635 * sched_clutch_hierarchy_thread_pset()
2637 * Routine to determine where a thread should be enqueued based on its
2638 * recommendation if this is the first runnable thread in the clutch_bucket
2639 * or its clutch bucket's hierarchy membership.
2641 static processor_set_t
2642 sched_clutch_hierarchy_thread_pset(thread_t thread
)
2644 if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread
) == false) {
2645 return (recommended_pset_type(thread
) == PSET_AMP_P
) ? pcore_set
: ecore_set
;
2648 sched_clutch_t clutch
= sched_clutch_for_thread(thread
);
2649 sched_clutch_bucket_t clutch_bucket
= &(clutch
->sc_clutch_buckets
[thread
->th_sched_bucket
]);
2650 sched_clutch_root_t scb_root
= os_atomic_load(&clutch_bucket
->scb_root
, relaxed
);
2652 /* Clutch bucket is already runnable, return the pset hierarchy its part of */
2653 return scb_root
->scr_pset
;
2655 return (recommended_pset_type(thread
) == PSET_AMP_E
) ? ecore_set
: pcore_set
;
2659 * sched_clutch_thread_pset_recommended()
2661 * Routine to determine if the thread should be placed on the provided pset.
2662 * The routine first makes sure the cluster is available for scheduling. If
2663 * it is available, it looks at the thread's recommendation. Called
2664 * with the pset lock held.
2667 sched_clutch_thread_pset_recommended(thread_t thread
, processor_set_t pset
)
2669 if (!sched_clutch_pset_available(pset
)) {
2673 /* At this point, all clusters should be available and recommended */
2674 if (sched_clutch_hierarchy_thread_pset(thread
) != pset
) {
2683 sched_clutch_amp_processor_queue_shutdown(processor_t processor
)
2685 processor_set_t pset
= processor
->processor_set
;
2686 sched_clutch_root_t pset_clutch_root
= sched_clutch_processor_root_clutch(processor
);
2688 queue_head_t tqueue
;
2690 /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
2691 if ((pset
->online_processor_count
> 0) && pset_is_recommended(pset
)) {
2696 queue_init(&tqueue
);
2697 while (sched_clutch_root_count(pset_clutch_root
) > 0) {
2698 thread
= sched_clutch_thread_highest(pset_clutch_root
);
2699 enqueue_tail(&tqueue
, &thread
->runq_links
);
2703 qe_foreach_element_safe(thread
, &tqueue
, runq_links
) {
2704 remqueue(&thread
->runq_links
);
2705 thread_lock(thread
);
2706 thread_setrun(thread
, SCHED_TAILQ
);
2707 thread_unlock(thread
);
2712 sched_clutch_amp_steal_thread(processor_set_t pset
)
2714 thread_t thread
= THREAD_NULL
;
2715 processor_set_t nset
= pset
;
2717 if (pcore_set
->online_processor_count
== 0) {
2718 /* Nothing to steal from */
2722 if (pset
->pset_cluster_type
== PSET_AMP_P
) {
2723 /* P cores don't steal from E cores */
2727 processor_t processor
= current_processor();
2728 assert(pset
== processor
->processor_set
);
2730 bool spill_pending
= bit_test(pset
->pending_spill_cpu_mask
, processor
->cpu_id
);
2731 bit_clear(pset
->pending_spill_cpu_mask
, processor
->cpu_id
);
2735 assert(nset
!= pset
);
2737 if (sched_get_pset_load_average(nset
) >= sched_amp_steal_threshold(nset
, spill_pending
)) {
2744 /* Allow steal if load average still OK, no idle cores, and more threads on runq than active cores DISPATCHING */
2745 if ((sched_get_pset_load_average(pset
) >= sched_amp_steal_threshold(pset
, spill_pending
)) &&
2746 ((int)sched_clutch_root_count(&pset
->pset_clutch_root
) > bit_count(pset
->cpu_state_map
[PROCESSOR_DISPATCHING
])) &&
2747 (bit_count(pset
->recommended_bitmask
& pset
->cpu_state_map
[PROCESSOR_IDLE
]) == 0)) {
2748 thread
= sched_clutch_thread_highest(&pset
->pset_clutch_root
);
2749 KDBG(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_AMP_STEAL
) | DBG_FUNC_NONE
, spill_pending
, 0, 0, 0);
2750 sched_update_pset_load_average(pset
);
2759 /* Return true if this thread should not continue running on this processor */
2761 sched_clutch_amp_thread_avoid_processor(processor_t processor
, thread_t thread
)
2763 if (processor
->processor_set
->pset_cluster_type
== PSET_AMP_E
) {
2764 if (sched_clutch_thread_pset_recommended(thread
, pcore_set
)) {
2767 } else if (processor
->processor_set
->pset_cluster_type
== PSET_AMP_P
) {
2768 if (!sched_clutch_thread_pset_recommended(thread
, pcore_set
)) {
2777 sched_clutch_amp_choose_processor(processor_set_t pset
, processor_t processor
, thread_t thread
)
2779 /* Bound threads don't call this function */
2780 assert(thread
->bound_processor
== PROCESSOR_NULL
);
2782 processor_set_t nset
;
2783 processor_t chosen_processor
= PROCESSOR_NULL
;
2786 nset
= (pset
== ecore_set
) ? pcore_set
: ecore_set
;
2787 if (!sched_clutch_pset_available(pset
)) {
2788 /* If the current pset is not available for scheduling, just use the other pset */
2791 goto select_processor
;
2794 /* Check if the thread is recommended to run on this pset */
2795 if (sched_clutch_thread_pset_recommended(thread
, pset
)) {
2797 goto select_processor
;
2799 /* pset not recommended; try the other pset */
2807 if (!sched_clutch_pset_available(nset
)) {
2809 * It looks like both psets are not available due to some
2810 * reason. In that case, just use the master processor's
2811 * pset for scheduling.
2813 if (master_processor
->processor_set
!= nset
) {
2815 nset
= master_processor
->processor_set
;
2819 chosen_processor
= choose_processor(nset
, processor
, thread
);
2820 assert(chosen_processor
->processor_set
== nset
);
2821 return chosen_processor
;
2825 * AMP Clutch Scheduler Thread Migration
2827 * For the AMP version of the clutch scheduler the thread is always scheduled via its
2828 * thread group. So it is important to make sure that the thread group is part of the
2829 * correct processor set hierarchy. In order to do that, the clutch scheduler moves
2830 * all eligble clutch buckets to the correct hierarchy when the recommendation of a
2831 * thread group is changed by CLPC.
2835 * sched_clutch_recommended_pset()
2837 * Routine to decide which hierarchy the thread group should be in based on the
2838 * recommendation and other thread group and system properties. This routine is
2839 * used to determine if thread group migration is necessary and should mimic the
2840 * logic in sched_clutch_thread_pset_recommended() & recommended_pset_type().
2842 static processor_set_t
2843 sched_clutch_recommended_pset(sched_clutch_t sched_clutch
, cluster_type_t recommendation
)
2845 if (!sched_clutch_pset_available(pcore_set
)) {
2849 if (!sched_clutch_pset_available(ecore_set
)) {
2854 * If all clusters are available and recommended, use the recommendation
2855 * to decide which cluster to use.
2857 pset_cluster_type_t type
= thread_group_pset_recommendation(sched_clutch
->sc_tg
, recommendation
);
2858 return (type
== PSET_AMP_E
) ? ecore_set
: pcore_set
;
2862 sched_clutch_bucket_threads_drain(sched_clutch_bucket_t clutch_bucket
, sched_clutch_root_t root_clutch
, queue_t clutch_threads
)
2864 uint16_t thread_count
= clutch_bucket
->scb_thr_count
;
2866 uint64_t current_timestamp
= mach_approximate_time();
2867 while (thread_count
> 0) {
2868 thread
= run_queue_peek(&clutch_bucket
->scb_runq
);
2869 sched_clutch_thread_remove(root_clutch
, thread
, current_timestamp
, SCHED_CLUTCH_BUCKET_OPTIONS_NONE
);
2870 enqueue_tail(clutch_threads
, &thread
->runq_links
);
2875 * This operation should have drained the clutch bucket and pulled it out of the
2878 assert(clutch_bucket
->scb_thr_count
== 0);
2879 assert(clutch_bucket
->scb_root
== NULL
);
2883 * sched_clutch_migrate_thread_group()
2885 * Routine to implement the migration of threads when the thread group
2886 * recommendation is updated. The migration works using a 2-phase
2889 * Phase 1: With the source pset (determined by sched_clutch_recommended_pset)
2890 * locked, drain all the runnable threads into a local queue and update the TG
2893 * Phase 2: Call thread_setrun() on all the drained threads. Since the TG recommendation
2894 * has been updated, these should all end up in the right hierarchy.
2897 sched_clutch_migrate_thread_group(sched_clutch_t sched_clutch
, cluster_type_t new_recommendation
)
2901 /* If the thread group is empty, just update the recommendation */
2902 if (os_atomic_load(&sched_clutch
->sc_thr_count
, relaxed
) == 0) {
2903 thread_group_update_recommendation(sched_clutch
->sc_tg
, new_recommendation
);
2907 processor_set_t dst_pset
= sched_clutch_recommended_pset(sched_clutch
, new_recommendation
);
2908 processor_set_t src_pset
= (dst_pset
== pcore_set
) ? ecore_set
: pcore_set
;
2910 queue_head_t clutch_threads
;
2911 queue_init(&clutch_threads
);
2913 /* Interrupts need to be disabled to make sure threads wont become runnable during the
2914 * migration and attempt to grab the pset/thread locks.
2916 spl_t s
= splsched();
2918 pset_lock(src_pset
);
2919 for (sched_bucket_t bucket
= TH_BUCKET_FIXPRI
; bucket
< TH_BUCKET_SCHED_MAX
; bucket
++) {
2920 sched_clutch_bucket_t clutch_bucket
= &(sched_clutch
->sc_clutch_buckets
[bucket
]);
2921 sched_clutch_root_t scb_root
= os_atomic_load(&clutch_bucket
->scb_root
, relaxed
);
2922 if ((scb_root
== NULL
) || (scb_root
->scr_pset
== dst_pset
)) {
2923 /* Clutch bucket not runnable or already in the right hierarchy; nothing to do here */
2926 assert(scb_root
->scr_pset
== src_pset
);
2927 /* Now remove all the threads from the runq so that thread->runq is set correctly */
2928 sched_clutch_bucket_threads_drain(clutch_bucket
, scb_root
, &clutch_threads
);
2932 * Now that all the clutch buckets have been drained, update the TG recommendation.
2933 * This operation needs to be done with the pset lock held to make sure that anyone
2934 * coming in before the migration started would get the original pset as the root
2935 * of this sched_clutch and attempt to hold the src_pset lock. Once the TG changes,
2936 * all threads that are becoming runnable would find the clutch bucket empty and
2937 * the TG recommendation would coax them to enqueue it in the new recommended
2938 * hierarchy. This effectively synchronizes with other threads calling
2939 * thread_setrun() and trying to decide which pset the thread/clutch_bucket
2942 thread_group_update_recommendation(sched_clutch
->sc_tg
, new_recommendation
);
2943 pset_unlock(src_pset
);
2945 /* Now setrun all the threads in the local queue */
2946 qe_foreach_element_safe(thread
, &clutch_threads
, runq_links
) {
2947 remqueue(&thread
->runq_links
);
2948 thread_lock(thread
);
2949 thread_setrun(thread
, SCHED_TAILQ
);
2950 thread_unlock(thread
);
2957 sched_clutch_amp_thread_group_recommendation_change(struct thread_group
*tg
, cluster_type_t new_recommendation
)
2960 * For the clutch scheduler, the change in recommendation moves the thread group
2961 * to the right hierarchy. sched_clutch_migrate_thread_group() is also responsible
2962 * for updating the recommendation of the thread group.
2964 sched_clutch_migrate_thread_group(&tg
->tg_sched_clutch
, new_recommendation
);
2966 if (new_recommendation
!= CLUSTER_TYPE_P
) {
2970 sched_amp_bounce_thread_group_from_ecores(ecore_set
, tg
);
2974 * sched_clutch_migrate_foreign_buckets()
2976 * Routine to migrate all the clutch buckets which are not in their recommended
2977 * pset hierarchy now that a new pset has become runnable. The algorithm is
2978 * similar to sched_clutch_migrate_thread_group().
2980 * Invoked with the newly recommended pset lock held and interrupts disabled.
2983 sched_clutch_migrate_foreign_buckets(__unused processor_t processor
, processor_set_t dst_pset
, boolean_t drop_lock
)
2986 processor_set_t src_pset
= (dst_pset
== pcore_set
) ? ecore_set
: pcore_set
;
2988 if (!sched_clutch_pset_available(dst_pset
)) {
2990 * It is possible that some state about the pset changed,
2991 * but its still not available for scheduling. Nothing to
2992 * do here in that case.
2995 pset_unlock(dst_pset
);
2999 pset_unlock(dst_pset
);
3001 queue_head_t clutch_threads
;
3002 queue_init(&clutch_threads
);
3003 sched_clutch_root_t src_root
= &src_pset
->pset_clutch_root
;
3005 pset_lock(src_pset
);
3006 queue_t clutch_bucket_list
= &src_pset
->pset_clutch_root
.scr_foreign_buckets
;
3008 if (sched_clutch_root_count(src_root
) == 0) {
3009 /* No threads present in this hierarchy */
3010 pset_unlock(src_pset
);
3011 goto migration_complete
;
3014 sched_clutch_bucket_t clutch_bucket
;
3015 qe_foreach_element_safe(clutch_bucket
, clutch_bucket_list
, scb_foreignlink
) {
3016 sched_clutch_root_t scb_root
= os_atomic_load(&clutch_bucket
->scb_root
, relaxed
);
3017 assert(scb_root
->scr_pset
== src_pset
);
3018 /* Now remove all the threads from the runq so that thread->runq is set correctly */
3019 sched_clutch_bucket_threads_drain(clutch_bucket
, scb_root
, &clutch_threads
);
3020 assert(clutch_bucket
->scb_foreign
== false);
3022 pset_unlock(src_pset
);
3024 /* Now setrun all the threads in the local queue */
3025 qe_foreach_element_safe(thread
, &clutch_threads
, runq_links
) {
3026 remqueue(&thread
->runq_links
);
3027 thread_lock(thread
);
3028 thread_setrun(thread
, SCHED_TAILQ
);
3029 thread_unlock(thread
);
3034 pset_lock(dst_pset
);
3038 #endif /* __AMP__ */
3040 #endif /* CONFIG_SCHED_CLUTCH */