]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <mach/mach_types.h> | |
30 | #include <mach/machine.h> | |
31 | #include <machine/machine_routines.h> | |
32 | #include <machine/sched_param.h> | |
33 | #include <machine/machine_cpu.h> | |
34 | #include <kern/kern_types.h> | |
35 | #include <kern/debug.h> | |
36 | #include <kern/machine.h> | |
37 | #include <kern/misc_protos.h> | |
38 | #include <kern/processor.h> | |
39 | #include <kern/queue.h> | |
40 | #include <kern/sched.h> | |
41 | #include <kern/sched_prim.h> | |
42 | #include <kern/task.h> | |
43 | #include <kern/thread.h> | |
44 | #include <kern/sched_clutch.h> | |
45 | #include <machine/atomic.h> | |
46 | #include <kern/sched_clutch.h> | |
47 | #include <sys/kdebug.h> | |
48 | ||
49 | ||
50 | #if CONFIG_SCHED_CLUTCH | |
51 | ||
52 | /* Forward declarations of static routines */ | |
53 | ||
54 | /* Root level hierarchy management */ | |
55 | static void sched_clutch_root_init(sched_clutch_root_t, processor_set_t); | |
56 | static void sched_clutch_root_bucket_init(sched_clutch_root_bucket_t, sched_bucket_t); | |
57 | static void sched_clutch_root_pri_update(sched_clutch_root_t); | |
58 | static sched_clutch_root_bucket_t sched_clutch_root_highest_root_bucket(sched_clutch_root_t, uint64_t); | |
59 | static void sched_clutch_root_urgency_inc(sched_clutch_root_t, thread_t); | |
60 | static void sched_clutch_root_urgency_dec(sched_clutch_root_t, thread_t); | |
61 | ||
62 | /* Root bucket level hierarchy management */ | |
63 | static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bucket_t, uint64_t); | |
64 | static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t); | |
65 | static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t); | |
66 | ||
67 | /* Clutch bucket level hierarchy management */ | |
68 | static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t); | |
69 | static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t); | |
70 | static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
71 | static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
72 | static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
73 | ||
74 | static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t); | |
75 | static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t); | |
76 | static uint8_t sched_clutch_bucket_pri_calculate(sched_clutch_bucket_t, uint64_t); | |
77 | static sched_clutch_bucket_t sched_clutch_root_bucket_highest_clutch_bucket(sched_clutch_root_bucket_t); | |
78 | ||
79 | /* Clutch timeshare properties updates */ | |
80 | static uint32_t sched_clutch_run_bucket_incr(sched_clutch_t, sched_bucket_t); | |
81 | static uint32_t sched_clutch_run_bucket_decr(sched_clutch_t, sched_bucket_t); | |
82 | static void sched_clutch_bucket_cpu_adjust(sched_clutch_bucket_t); | |
83 | static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t); | |
84 | static boolean_t sched_thread_sched_pri_promoted(thread_t); | |
85 | /* Clutch membership management */ | |
86 | static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t); | |
87 | static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t); | |
88 | static thread_t sched_clutch_thread_highest(sched_clutch_root_t); | |
89 | ||
90 | /* Clutch properties updates */ | |
91 | static uint32_t sched_clutch_root_urgency(sched_clutch_root_t); | |
92 | static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t); | |
93 | static int sched_clutch_root_priority(sched_clutch_root_t); | |
94 | ||
95 | ||
96 | /* Helper debugging routines */ | |
97 | static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t); | |
98 | ||
99 | ||
100 | ||
101 | /* | |
102 | * Global priority queue comparator routine for root buckets. The | |
103 | * routine implements the priority queue as a minimum deadline queue | |
104 | * to achieve EDF scheduling. | |
105 | */ | |
106 | priority_queue_compare_fn_t sched_clutch_root_bucket_compare; | |
107 | ||
108 | ||
109 | /* | |
110 | * Special markers for buckets that have invalid WCELs/quantums etc. | |
111 | */ | |
112 | #define SCHED_CLUTCH_INVALID_TIME_32 ((uint32_t)~0) | |
113 | #define SCHED_CLUTCH_INVALID_TIME_64 ((uint64_t)~0) | |
114 | ||
115 | /* | |
116 | * Root level bucket WCELs | |
117 | * | |
118 | * The root level bucket selection algorithm is an Earliest Deadline | |
119 | * First (EDF) algorithm where the deadline for buckets are defined | |
120 | * by the worst-case-execution-latency and the make runnable timestamp | |
121 | * for the bucket. | |
122 | * | |
123 | */ | |
124 | static uint32_t sched_clutch_root_bucket_wcel_us[TH_BUCKET_SCHED_MAX] = { | |
125 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
126 | 0, /* FG */ | |
127 | 37500, /* IN (37.5ms) */ | |
128 | 75000, /* DF (75ms) */ | |
129 | 150000, /* UT (150ms) */ | |
130 | 250000 /* BG (250ms) */ | |
131 | }; | |
132 | static uint64_t sched_clutch_root_bucket_wcel[TH_BUCKET_SCHED_MAX] = {0}; | |
133 | ||
134 | /* | |
135 | * Root level bucket warp | |
136 | * | |
137 | * Each root level bucket has a warp value associated with it as well. | |
138 | * The warp value allows the root bucket to effectively warp ahead of | |
139 | * lower priority buckets for a limited time even if it has a later | |
140 | * deadline. The warping behavior provides extra (but limited) | |
141 | * opportunity for high priority buckets to remain responsive. | |
142 | */ | |
143 | ||
144 | /* Special warp deadline value to indicate that the bucket has not used any warp yet */ | |
145 | #define SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED (SCHED_CLUTCH_INVALID_TIME_64) | |
146 | ||
147 | /* Warp window durations for various tiers */ | |
148 | static uint32_t sched_clutch_root_bucket_warp_us[TH_BUCKET_SCHED_MAX] = { | |
149 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
150 | 8000, /* FG (8ms)*/ | |
151 | 4000, /* IN (4ms) */ | |
152 | 2000, /* DF (2ms) */ | |
153 | 1000, /* UT (1ms) */ | |
154 | 0 /* BG (0ms) */ | |
155 | }; | |
156 | static uint64_t sched_clutch_root_bucket_warp[TH_BUCKET_SCHED_MAX] = {0}; | |
157 | ||
158 | /* | |
159 | * Thread level quantum | |
160 | * | |
161 | * The algorithm defines quantums for threads at various buckets. This | |
162 | * (combined with the root level bucket quantums) restricts how much | |
163 | * the lower priority levels can preempt the higher priority threads. | |
164 | */ | |
165 | static uint32_t sched_clutch_thread_quantum_us[TH_BUCKET_SCHED_MAX] = { | |
166 | 10000, /* FIXPRI (10ms) */ | |
167 | 10000, /* FG (10ms) */ | |
168 | 8000, /* IN (8ms) */ | |
169 | 6000, /* DF (6ms) */ | |
170 | 4000, /* UT (4ms) */ | |
171 | 2000 /* BG (2ms) */ | |
172 | }; | |
173 | static uint64_t sched_clutch_thread_quantum[TH_BUCKET_SCHED_MAX] = {0}; | |
174 | ||
175 | enum sched_clutch_state { | |
176 | SCHED_CLUTCH_STATE_EMPTY = 0, | |
177 | SCHED_CLUTCH_STATE_RUNNABLE, | |
178 | }; | |
179 | ||
180 | /* | |
181 | * sched_clutch_us_to_abstime() | |
182 | * | |
183 | * Initializer for converting all durations in usec to abstime | |
184 | */ | |
185 | static void | |
186 | sched_clutch_us_to_abstime(uint32_t *us_vals, uint64_t *abstime_vals) | |
187 | { | |
188 | for (int i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
189 | if (us_vals[i] == SCHED_CLUTCH_INVALID_TIME_32) { | |
190 | abstime_vals[i] = SCHED_CLUTCH_INVALID_TIME_64; | |
191 | } else { | |
192 | clock_interval_to_absolutetime_interval(us_vals[i], | |
193 | NSEC_PER_USEC, &abstime_vals[i]); | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | #if DEVELOPMENT || DEBUG | |
199 | ||
200 | /* | |
201 | * sched_clutch_hierarchy_locked_assert() | |
202 | * | |
203 | * Debugging helper routine. Asserts that the hierarchy is locked. The locking | |
204 | * for the hierarchy depends on where the hierarchy is hooked. The current | |
205 | * implementation hooks the hierarchy at the pset, so the hierarchy is locked | |
206 | * using the pset lock. | |
207 | */ | |
208 | static inline void | |
209 | sched_clutch_hierarchy_locked_assert( | |
210 | sched_clutch_root_t root_clutch) | |
211 | { | |
212 | pset_assert_locked(root_clutch->scr_pset); | |
213 | } | |
214 | ||
215 | #else /* DEVELOPMENT || DEBUG */ | |
216 | ||
217 | static inline void | |
218 | sched_clutch_hierarchy_locked_assert( | |
219 | __unused sched_clutch_root_t root_clutch) | |
220 | { | |
221 | } | |
222 | ||
223 | #endif /* DEVELOPMENT || DEBUG */ | |
224 | ||
225 | /* | |
226 | * sched_clutch_thr_count_inc() | |
227 | * | |
228 | * Increment thread count at a hierarchy level with overflow checks. | |
229 | */ | |
230 | static void | |
231 | sched_clutch_thr_count_inc( | |
232 | uint16_t *thr_count) | |
233 | { | |
234 | if (__improbable(os_inc_overflow(thr_count))) { | |
235 | panic("sched_clutch thread count overflowed!"); | |
236 | } | |
237 | } | |
238 | ||
239 | /* | |
240 | * sched_clutch_thr_count_dec() | |
241 | * | |
242 | * Decrement thread count at a hierarchy level with underflow checks. | |
243 | */ | |
244 | static void | |
245 | sched_clutch_thr_count_dec( | |
246 | uint16_t *thr_count) | |
247 | { | |
248 | if (__improbable(os_dec_overflow(thr_count))) { | |
249 | panic("sched_clutch thread count underflowed!"); | |
250 | } | |
251 | } | |
252 | ||
253 | ||
254 | /* | |
255 | * sched_clutch_root_init() | |
256 | * | |
257 | * Routine to initialize the scheduler hierarchy root. | |
258 | */ | |
259 | static void | |
260 | sched_clutch_root_init( | |
261 | sched_clutch_root_t root_clutch, | |
262 | processor_set_t pset) | |
263 | { | |
264 | root_clutch->scr_thr_count = 0; | |
265 | root_clutch->scr_priority = NOPRI; | |
266 | root_clutch->scr_urgency = 0; | |
267 | root_clutch->scr_pset = pset; | |
268 | ||
269 | /* Initialize the queue which maintains all runnable clutch_buckets for timesharing purposes */ | |
270 | queue_init(&root_clutch->scr_clutch_buckets); | |
271 | ||
272 | /* Initialize the queue which maintains all runnable foreign clutch buckets */ | |
273 | queue_init(&root_clutch->scr_foreign_buckets); | |
274 | ||
275 | /* Initialize the bitmap and priority queue of runnable root buckets */ | |
276 | sched_clutch_root_bucket_compare = priority_heap_make_comparator(a, b, struct sched_clutch_root_bucket, scrb_pqlink, { | |
277 | return (a->scrb_deadline < b->scrb_deadline) ? 1 : ((a->scrb_deadline == b->scrb_deadline) ? 0 : -1); | |
278 | }); | |
279 | priority_queue_init(&root_clutch->scr_root_buckets, PRIORITY_QUEUE_GENERIC_KEY | PRIORITY_QUEUE_MIN_HEAP); | |
280 | bitmap_zero(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX); | |
281 | bitmap_zero(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
282 | ||
283 | /* Initialize all the root buckets */ | |
284 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
285 | sched_clutch_root_bucket_init(&root_clutch->scr_buckets[i], i); | |
286 | } | |
287 | } | |
288 | ||
289 | /* | |
290 | * sched_clutch_root_bucket_init() | |
291 | * | |
292 | * Routine to initialize root buckets. | |
293 | */ | |
294 | static void | |
295 | sched_clutch_root_bucket_init( | |
296 | sched_clutch_root_bucket_t root_bucket, | |
297 | sched_bucket_t bucket) | |
298 | { | |
299 | root_bucket->scrb_bucket = bucket; | |
300 | priority_queue_init(&root_bucket->scrb_clutch_buckets, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP); | |
301 | priority_queue_entry_init(&root_bucket->scrb_pqlink); | |
302 | root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64; | |
303 | root_bucket->scrb_warped_deadline = 0; | |
304 | root_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[root_bucket->scrb_bucket]; | |
305 | } | |
306 | ||
307 | /* | |
308 | * sched_clutch_root_bucket_pri_compare() | |
309 | * | |
310 | * Routine to compare root buckets based on the highest runnable clutch | |
311 | * bucket priorities in the root buckets. | |
312 | */ | |
313 | static int | |
314 | sched_clutch_root_bucket_pri_compare( | |
315 | sched_clutch_root_bucket_t a, | |
316 | sched_clutch_root_bucket_t b) | |
317 | { | |
318 | sched_clutch_bucket_t a_highest = sched_clutch_root_bucket_highest_clutch_bucket(a); | |
319 | sched_clutch_bucket_t b_highest = sched_clutch_root_bucket_highest_clutch_bucket(b); | |
320 | return (a_highest->scb_priority > b_highest->scb_priority) ? | |
321 | 1 : ((a_highest->scb_priority == b_highest->scb_priority) ? 0 : -1); | |
322 | } | |
323 | ||
324 | /* | |
325 | * sched_clutch_root_select_aboveui() | |
326 | * | |
327 | * Special case scheduling for Above UI bucket. | |
328 | * | |
329 | * AboveUI threads are typically system critical threads that need low latency | |
330 | * which is why they are handled specially. | |
331 | * | |
332 | * Since the priority range for AboveUI and FG Timeshare buckets overlap, it is | |
333 | * important to maintain some native priority order between those buckets. The policy | |
334 | * implemented here is to compare the highest clutch buckets of both buckets; if the | |
335 | * Above UI bucket is higher, schedule it immediately. Otherwise fall through to the | |
336 | * deadline based scheduling which should pickup the timeshare buckets. | |
337 | * | |
338 | * The implementation allows extremely low latency CPU access for Above UI threads | |
339 | * while supporting the use case of high priority timeshare threads contending with | |
340 | * lower priority fixed priority threads. | |
341 | */ | |
342 | static boolean_t | |
343 | sched_clutch_root_select_aboveui( | |
344 | sched_clutch_root_t root_clutch) | |
345 | { | |
346 | if (bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_FIXPRI)) { | |
347 | sched_clutch_root_bucket_t root_bucket_aboveui = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
348 | sched_clutch_root_bucket_t root_bucket_sharefg = &root_clutch->scr_buckets[TH_BUCKET_SHARE_FG]; | |
349 | ||
350 | if (!bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_SHARE_FG)) { | |
351 | /* If the timeshare FG bucket is not runnable, pick the aboveUI bucket for scheduling */ | |
352 | return true; | |
353 | } | |
354 | if (sched_clutch_root_bucket_pri_compare(root_bucket_aboveui, root_bucket_sharefg) >= 0) { | |
355 | /* If the aboveUI bucket has a higher native clutch bucket priority, schedule it */ | |
356 | return true; | |
357 | } | |
358 | } | |
359 | return false; | |
360 | } | |
361 | ||
362 | ||
363 | /* | |
364 | * sched_clutch_root_highest_root_bucket() | |
365 | * | |
366 | * Main routine to find the highest runnable root level bucket. | |
367 | * This routine is called from performance sensitive contexts; so it is | |
368 | * crucial to keep this O(1). | |
369 | * | |
370 | */ | |
371 | static sched_clutch_root_bucket_t | |
372 | sched_clutch_root_highest_root_bucket( | |
373 | sched_clutch_root_t root_clutch, | |
374 | uint64_t timestamp) | |
375 | { | |
376 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
377 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
378 | return NULL; | |
379 | } | |
380 | ||
381 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
382 | return &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
383 | } | |
384 | ||
385 | /* | |
386 | * Above UI bucket is not runnable or has a low priority clutch bucket; use the earliest deadline model | |
387 | * to schedule threads. The idea is that as the timeshare buckets use CPU, they will drop their | |
388 | * interactivity score and allow low priority AboveUI clutch buckets to be scheduled. | |
389 | */ | |
390 | ||
391 | /* Find the earliest deadline bucket */ | |
392 | sched_clutch_root_bucket_t edf_bucket = priority_queue_min(&root_clutch->scr_root_buckets, struct sched_clutch_root_bucket, scrb_pqlink); | |
393 | ||
394 | sched_clutch_root_bucket_t warp_bucket = NULL; | |
395 | int warp_bucket_index = -1; | |
396 | evaluate_warp_buckets: | |
397 | /* Check if any higher runnable buckets have warp available */ | |
398 | warp_bucket_index = bitmap_lsb_first(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
399 | ||
400 | if ((warp_bucket_index == -1) || (warp_bucket_index >= edf_bucket->scrb_bucket)) { | |
401 | /* No higher buckets have warp available; choose the edf bucket and replenish its warp */ | |
402 | sched_clutch_root_bucket_deadline_update(edf_bucket, root_clutch, timestamp); | |
403 | edf_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[edf_bucket->scrb_bucket]; | |
404 | edf_bucket->scrb_warped_deadline = SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED; | |
405 | bitmap_set(root_clutch->scr_warp_available, edf_bucket->scrb_bucket); | |
406 | return edf_bucket; | |
407 | } | |
408 | ||
409 | /* | |
410 | * Looks like there is a root bucket which is higher in the natural priority | |
411 | * order than edf_bucket and might have some warp remaining. | |
412 | */ | |
413 | warp_bucket = &root_clutch->scr_buckets[warp_bucket_index]; | |
414 | if (warp_bucket->scrb_warped_deadline == SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
415 | /* Root bucket has not used any of its warp; set a deadline to expire its warp and return it */ | |
416 | warp_bucket->scrb_warped_deadline = timestamp + warp_bucket->scrb_warp_remaining; | |
417 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
418 | return warp_bucket; | |
419 | } | |
420 | if (warp_bucket->scrb_warped_deadline > timestamp) { | |
421 | /* Root bucket already has a warp window open with some warp remaining */ | |
422 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
423 | return warp_bucket; | |
424 | } | |
425 | ||
426 | /* For this bucket, warp window was opened sometime in the past but has now | |
427 | * expired. Mark the bucket as not avilable for warp anymore and re-run the | |
428 | * warp bucket selection logic. | |
429 | */ | |
430 | warp_bucket->scrb_warp_remaining = 0; | |
431 | bitmap_clear(root_clutch->scr_warp_available, warp_bucket->scrb_bucket); | |
432 | goto evaluate_warp_buckets; | |
433 | } | |
434 | ||
435 | /* | |
436 | * sched_clutch_root_bucket_deadline_calculate() | |
437 | * | |
438 | * Calculate the deadline for the bucket based on its WCEL | |
439 | */ | |
440 | static uint64_t | |
441 | sched_clutch_root_bucket_deadline_calculate( | |
442 | sched_clutch_root_bucket_t root_bucket, | |
443 | uint64_t timestamp) | |
444 | { | |
445 | /* For fixpri AboveUI bucket always return it as the earliest deadline */ | |
446 | if (root_bucket->scrb_bucket < TH_BUCKET_SHARE_FG) { | |
447 | return 0; | |
448 | } | |
449 | ||
450 | /* For all timeshare buckets set the deadline as current time + worst-case-execution-latency */ | |
451 | return timestamp + sched_clutch_root_bucket_wcel[root_bucket->scrb_bucket]; | |
452 | } | |
453 | ||
454 | /* | |
455 | * sched_clutch_root_bucket_deadline_update() | |
456 | * | |
457 | * Routine to update the deadline of the root bucket when it is selected. | |
458 | * Updating the deadline also moves the root_bucket in the EDF priority | |
459 | * queue. | |
460 | */ | |
461 | static void | |
462 | sched_clutch_root_bucket_deadline_update( | |
463 | sched_clutch_root_bucket_t root_bucket, | |
464 | sched_clutch_root_t root_clutch, | |
465 | uint64_t timestamp) | |
466 | { | |
467 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
468 | /* The algorithm never uses the deadlines for scheduling TH_BUCKET_FIXPRI bucket */ | |
469 | return; | |
470 | } | |
471 | uint64_t old_deadline = root_bucket->scrb_deadline; | |
472 | uint64_t new_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
473 | assert(old_deadline <= new_deadline); | |
474 | if (old_deadline != new_deadline) { | |
475 | root_bucket->scrb_deadline = new_deadline; | |
476 | /* Since the priority queue is a min-heap, use the decrease routine even though the deadline has a larger value now */ | |
477 | priority_queue_entry_decrease(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
478 | } | |
479 | } | |
480 | ||
481 | /* | |
482 | * sched_clutch_root_bucket_runnable() | |
483 | * | |
484 | * Routine to insert a newly runnable root bucket into the hierarchy. | |
485 | * Also updates the deadline and warp parameters as necessary. | |
486 | */ | |
487 | static void | |
488 | sched_clutch_root_bucket_runnable( | |
489 | sched_clutch_root_bucket_t root_bucket, | |
490 | sched_clutch_root_t root_clutch, | |
491 | uint64_t timestamp) | |
492 | { | |
493 | /* Mark the root bucket as runnable */ | |
494 | bitmap_set(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
495 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
496 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, 0, 0, 0); | |
497 | ||
498 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
499 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
500 | return; | |
501 | } | |
502 | ||
503 | root_bucket->scrb_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
504 | priority_queue_insert(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
505 | ||
506 | if (root_bucket->scrb_warp_remaining) { | |
507 | /* Since the bucket has some warp remaining and its now runnable, mark it as available for warp */ | |
508 | bitmap_set(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
509 | } | |
510 | } | |
511 | ||
512 | /* | |
513 | * sched_clutch_root_bucket_empty() | |
514 | * | |
515 | * Routine to remove an empty root bucket from the hierarchy. | |
516 | * Also updates the deadline and warp parameters as necessary. | |
517 | */ | |
518 | static void | |
519 | sched_clutch_root_bucket_empty( | |
520 | sched_clutch_root_bucket_t root_bucket, | |
521 | sched_clutch_root_t root_clutch, | |
522 | uint64_t timestamp) | |
523 | { | |
524 | bitmap_clear(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
525 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
526 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0, 0); | |
527 | ||
528 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
529 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
530 | return; | |
531 | } | |
532 | ||
533 | priority_queue_remove(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, sched_clutch_root_bucket_compare); | |
534 | ||
535 | bitmap_clear(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
536 | if (root_bucket->scrb_warped_deadline > timestamp) { | |
537 | /* | |
538 | * For root buckets that were using the warp, check if the warp | |
539 | * deadline is in the future. If yes, remove the wall time the | |
540 | * warp was active and update the warp remaining. This allows | |
541 | * the root bucket to use the remaining warp the next time it | |
542 | * becomes runnable. | |
543 | */ | |
544 | root_bucket->scrb_warp_remaining = root_bucket->scrb_warped_deadline - timestamp; | |
545 | } else if (root_bucket->scrb_warped_deadline != SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
546 | /* | |
547 | * If the root bucket's warped deadline is in the past, it has used up | |
548 | * all the warp it was assigned. Empty out its warp remaining. | |
549 | */ | |
550 | root_bucket->scrb_warp_remaining = 0; | |
551 | } | |
552 | } | |
553 | ||
554 | /* | |
555 | * sched_clutch_root_pri_update() | |
556 | * | |
557 | * The root level priority is used for thread selection and preemption | |
558 | * logic. | |
559 | */ | |
560 | static void | |
561 | sched_clutch_root_pri_update( | |
562 | sched_clutch_root_t root_clutch) | |
563 | { | |
564 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
565 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
566 | /* No runnable root buckets */ | |
567 | root_clutch->scr_priority = NOPRI; | |
568 | assert(root_clutch->scr_urgency == 0); | |
569 | return; | |
570 | } | |
571 | sched_clutch_root_bucket_t root_bucket = NULL; | |
572 | /* Special case for AboveUI (uses same logic as thread selection) */ | |
573 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
574 | root_bucket = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
575 | } else { | |
576 | /* | |
577 | * AboveUI bucket is not runnable or has a low clutch bucket priority, | |
578 | * select the next runnable root bucket in natural priority order. This logic | |
579 | * is slightly different from thread selection, because thread selection | |
580 | * considers deadlines, warps etc. to decide the most optimal bucket at a | |
581 | * given timestamp. Since the priority value is used for preemption decisions | |
582 | * only, it needs to be based on the highest runnable thread available in | |
583 | * the timeshare domain. | |
584 | */ | |
585 | int root_bucket_index = bitmap_lsb_next(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX, TH_BUCKET_FIXPRI); | |
586 | assert(root_bucket_index != -1); | |
587 | root_bucket = &root_clutch->scr_buckets[root_bucket_index]; | |
588 | } | |
589 | /* For the selected root bucket, find the highest priority clutch bucket */ | |
590 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
591 | root_clutch->scr_priority = priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
592 | } | |
593 | ||
594 | /* | |
595 | * sched_clutch_root_urgency_inc() | |
596 | * | |
597 | * Routine to increment the urgency at the root level based on the thread | |
598 | * priority that is being inserted into the hierarchy. The root urgency | |
599 | * counter is updated based on the urgency of threads in any of the | |
600 | * clutch buckets which are part of the hierarchy. | |
601 | * | |
602 | * Always called with the pset lock held. | |
603 | */ | |
604 | static void | |
605 | sched_clutch_root_urgency_inc( | |
606 | sched_clutch_root_t root_clutch, | |
607 | thread_t thread) | |
608 | { | |
609 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
610 | root_clutch->scr_urgency++; | |
611 | } | |
612 | } | |
613 | ||
614 | /* | |
615 | * sched_clutch_root_urgency_dec() | |
616 | * | |
617 | * Routine to decrement the urgency at the root level based on the thread | |
618 | * priority that is being removed from the hierarchy. The root urgency | |
619 | * counter is updated based on the urgency of threads in any of the | |
620 | * clutch buckets which are part of the hierarchy. | |
621 | * | |
622 | * Always called with the pset lock held. | |
623 | */ | |
624 | static void | |
625 | sched_clutch_root_urgency_dec( | |
626 | sched_clutch_root_t root_clutch, | |
627 | thread_t thread) | |
628 | { | |
629 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
630 | root_clutch->scr_urgency--; | |
631 | } | |
632 | } | |
633 | ||
634 | /* | |
635 | * Clutch bucket level scheduling | |
636 | * | |
637 | * The second level of scheduling is the clutch bucket level scheduling | |
638 | * which tries to schedule thread groups within root_buckets. Each | |
639 | * clutch represents a thread group and a clutch_bucket represents | |
640 | * threads at a particular sched_bucket within that thread group. The | |
641 | * goal of this level of scheduling is to allow interactive thread | |
642 | * groups low latency access to the CPU. It also provides slight | |
643 | * scheduling preference for App and unrestricted thread groups. | |
644 | * | |
645 | * The clutch bucket scheduling algorithm measures an interactivity | |
646 | * score for all clutch buckets. The interactivity score is based | |
647 | * on the ratio of the CPU used and the voluntary blocking of threads | |
648 | * within the clutch bucket. The algorithm is very close to the ULE | |
649 | * scheduler on FreeBSD in terms of calculations. The interactivity | |
650 | * score provides an interactivity boost in the range of | |
651 | * [0:SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI * 2] which allows interactive | |
652 | * thread groups to win over CPU spinners. | |
653 | */ | |
654 | ||
655 | /* Priority boost range for interactivity */ | |
656 | #define SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT (8) | |
657 | uint8_t sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
658 | ||
659 | /* window to scale the cpu usage and blocked values (currently 500ms). Its the threshold of used+blocked */ | |
660 | uint64_t sched_clutch_bucket_adjust_threshold = 0; | |
661 | #define SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS (500000) | |
662 | ||
663 | /* The ratio to scale the cpu/blocked time per window */ | |
664 | #define SCHED_CLUTCH_BUCKET_ADJUST_RATIO (10) | |
665 | ||
666 | /* rate at which interactivity score is recalculated. This keeps the score smooth in terms of extremely bursty behavior */ | |
667 | uint64_t sched_clutch_bucket_interactivity_delta = 0; | |
668 | #define SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT (25000) | |
669 | ||
670 | /* | |
671 | * In order to allow App thread groups some preference over daemon thread | |
672 | * groups, the App clutch_buckets get a 8 point boost. The boost value should | |
673 | * be chosen such that badly behaved apps are still penalized over well | |
674 | * behaved interactive daemon clutch_buckets. | |
675 | */ | |
676 | #define SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT (8) | |
677 | uint8_t sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
678 | ||
679 | /* Initial value for voluntary blocking time for the clutch_bucket */ | |
680 | #define SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID (uint32_t)(~0) | |
681 | ||
682 | /* | |
683 | * sched_clutch_bucket_init() | |
684 | * | |
685 | * Initializer for clutch buckets. | |
686 | */ | |
687 | static void | |
688 | sched_clutch_bucket_init( | |
689 | sched_clutch_bucket_t clutch_bucket, | |
690 | sched_clutch_t clutch, | |
691 | sched_bucket_t bucket) | |
692 | { | |
693 | bzero(clutch_bucket, sizeof(struct sched_clutch_bucket)); | |
694 | ||
695 | clutch_bucket->scb_bucket = bucket; | |
696 | /* scb_priority will be recalculated when a thread is inserted in the clutch bucket */ | |
697 | clutch_bucket->scb_priority = 0; | |
698 | /* | |
699 | * All thread groups should be initialized to be interactive; this allows the newly launched | |
700 | * thread groups to fairly compete with already running thread groups. | |
701 | */ | |
702 | clutch_bucket->scb_interactivity_score = (sched_clutch_bucket_interactive_pri * 2); | |
703 | clutch_bucket->scb_foreign = false; | |
704 | ||
705 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, 0, relaxed); | |
706 | os_atomic_store(&clutch_bucket->scb_pri_shift, INT8_MAX, relaxed); | |
707 | ||
708 | clutch_bucket->scb_interactivity_ts = 0; | |
709 | clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID; | |
710 | priority_queue_entry_init(&clutch_bucket->scb_pqlink); | |
711 | clutch_bucket->scb_clutch = clutch; | |
712 | clutch_bucket->scb_root = NULL; | |
713 | priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP); | |
714 | run_queue_init(&clutch_bucket->scb_runq); | |
715 | } | |
716 | ||
717 | /* | |
718 | * sched_clutch_init_with_thread_group() | |
719 | * | |
720 | * Initialize the sched_clutch when the thread group is being created | |
721 | */ | |
722 | void | |
723 | sched_clutch_init_with_thread_group( | |
724 | sched_clutch_t clutch, | |
725 | struct thread_group *tg) | |
726 | { | |
727 | os_atomic_store(&clutch->sc_thr_count, 0, relaxed); | |
728 | ||
729 | /* Initialize all the clutch buckets */ | |
730 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
731 | sched_clutch_bucket_init(&(clutch->sc_clutch_buckets[i]), clutch, i); | |
732 | } | |
733 | ||
734 | /* Grouping specific fields */ | |
735 | clutch->sc_tg = tg; | |
736 | os_atomic_store(&clutch->sc_tg_priority, 0, relaxed); | |
737 | } | |
738 | ||
739 | /* | |
740 | * sched_clutch_destroy() | |
741 | * | |
742 | * Destructor for clutch; called from thread group release code. | |
743 | */ | |
744 | void | |
745 | sched_clutch_destroy( | |
746 | __unused sched_clutch_t clutch) | |
747 | { | |
748 | assert(os_atomic_load(&clutch->sc_thr_count, relaxed) == 0); | |
749 | } | |
750 | ||
751 | ||
752 | /* | |
753 | * sched_clutch_bucket_hierarchy_insert() | |
754 | * | |
755 | * Routine to insert a newly runnable clutch_bucket into the root hierarchy. | |
756 | */ | |
757 | static void | |
758 | sched_clutch_bucket_hierarchy_insert( | |
759 | sched_clutch_root_t root_clutch, | |
760 | sched_clutch_bucket_t clutch_bucket, | |
761 | sched_bucket_t bucket, | |
762 | uint64_t timestamp) | |
763 | { | |
764 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
765 | if (bucket > TH_BUCKET_FIXPRI) { | |
766 | /* Enqueue the timeshare clutch buckets into the global runnable clutch_bucket list; used for sched tick operations */ | |
767 | enqueue_tail(&root_clutch->scr_clutch_buckets, &clutch_bucket->scb_listlink); | |
768 | } | |
769 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; | |
770 | ||
771 | /* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */ | |
772 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
773 | sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp); | |
774 | } | |
775 | ||
776 | /* Insert the clutch bucket into the root bucket priority queue */ | |
777 | priority_queue_insert(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
778 | os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed); | |
779 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
780 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0); | |
781 | } | |
782 | ||
783 | /* | |
784 | * sched_clutch_bucket_hierarchy_remove() | |
785 | * | |
786 | * Rotuine to remove a empty clutch bucket from the root hierarchy. | |
787 | */ | |
788 | static void | |
789 | sched_clutch_bucket_hierarchy_remove( | |
790 | sched_clutch_root_t root_clutch, | |
791 | sched_clutch_bucket_t clutch_bucket, | |
792 | sched_bucket_t bucket, | |
793 | uint64_t timestamp) | |
794 | { | |
795 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
796 | if (bucket > TH_BUCKET_FIXPRI) { | |
797 | /* Remove the timeshare clutch bucket from the globally runnable clutch_bucket list */ | |
798 | remqueue(&clutch_bucket->scb_listlink); | |
799 | } | |
800 | ||
801 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; | |
802 | ||
803 | /* Remove the clutch bucket from the root bucket priority queue */ | |
804 | priority_queue_remove(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
805 | os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed); | |
806 | clutch_bucket->scb_blocked_ts = timestamp; | |
807 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
808 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0); | |
809 | ||
810 | /* If the root bucket priority queue is now empty, remove it from the root priority queue */ | |
811 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
812 | sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp); | |
813 | } | |
814 | } | |
815 | ||
816 | /* | |
817 | * sched_clutch_bucket_base_pri() | |
818 | * | |
819 | * Calculates the "base" priority of the clutch bucket. The base | |
820 | * priority of the clutch bucket is the sum of the max of highest | |
821 | * base_pri and highest sched_pri in the clutch bucket and any | |
822 | * grouping specific (App/Daemon...) boosts applicable to the | |
823 | * clutch_bucket. | |
824 | */ | |
825 | static uint8_t | |
826 | sched_clutch_bucket_base_pri( | |
827 | sched_clutch_bucket_t clutch_bucket) | |
828 | { | |
829 | uint8_t clutch_boost = 0; | |
830 | assert(clutch_bucket->scb_runq.count != 0); | |
831 | ||
832 | sched_clutch_t clutch = clutch_bucket->scb_clutch; | |
833 | ||
834 | /* | |
835 | * Since the clutch bucket can contain threads that are members of the group due | |
836 | * to the sched_pri being promoted or due to their base pri, the base priority of | |
837 | * the entire clutch bucket should be based on the highest thread (promoted or base) | |
838 | * in the clutch bucket. | |
839 | */ | |
840 | uint8_t max_pri = priority_queue_empty(&clutch_bucket->scb_clutchpri_prioq) ? 0 : priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
841 | ||
842 | /* | |
843 | * For all AboveUI clutch buckets and clutch buckets for thread groups that | |
844 | * havent been specified as SCHED_CLUTCH_TG_PRI_LOW, give a priority boost | |
845 | */ | |
846 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
847 | (os_atomic_load(&clutch->sc_tg_priority, relaxed) != SCHED_CLUTCH_TG_PRI_LOW)) { | |
848 | clutch_boost = sched_clutch_bucket_pri_boost; | |
849 | } | |
850 | return max_pri + clutch_boost; | |
851 | } | |
852 | ||
853 | /* | |
854 | * sched_clutch_bucket_interactivity_score_calculate() | |
855 | * | |
856 | * Routine to calculate the interactivity score for the clutch bucket. The | |
857 | * interactivity score is based on the ratio of CPU used by all threads in | |
858 | * the bucket and the blocked time of the bucket as a whole. | |
859 | */ | |
860 | static uint8_t | |
861 | sched_clutch_bucket_interactivity_score_calculate( | |
862 | sched_clutch_bucket_t clutch_bucket, | |
863 | uint64_t timestamp) | |
864 | { | |
865 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
866 | /* | |
867 | * Since the root bucket selection algorithm for Above UI looks at clutch bucket | |
868 | * priorities, make sure all AboveUI buckets are marked interactive. | |
869 | */ | |
870 | assert(clutch_bucket->scb_interactivity_score == (2 * sched_clutch_bucket_interactive_pri)); | |
871 | return clutch_bucket->scb_interactivity_score; | |
872 | } | |
873 | ||
874 | if (clutch_bucket->scb_interactivity_ts == 0) { | |
875 | /* | |
876 | * This indicates a newly initialized clutch bucket; return the default interactivity score | |
877 | * and update timestamp. | |
878 | */ | |
879 | clutch_bucket->scb_interactivity_ts = timestamp; | |
880 | return clutch_bucket->scb_interactivity_score; | |
881 | } | |
882 | ||
883 | if (timestamp < (clutch_bucket->scb_interactivity_ts + sched_clutch_bucket_interactivity_delta)) { | |
884 | return clutch_bucket->scb_interactivity_score; | |
885 | } | |
886 | ||
887 | /* Check if the clutch bucket accounting needs to be scaled */ | |
888 | sched_clutch_bucket_cpu_adjust(clutch_bucket); | |
889 | clutch_bucket->scb_interactivity_ts = timestamp; | |
890 | ||
891 | sched_clutch_bucket_cpu_data_t scb_cpu_data; | |
892 | scb_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
893 | clutch_cpu_data_t cpu_used = scb_cpu_data.cpu_data.scbcd_cpu_used; | |
894 | clutch_cpu_data_t cpu_blocked = scb_cpu_data.cpu_data.scbcd_cpu_blocked; | |
895 | ||
896 | /* | |
897 | * In extremely CPU contended cases, it is possible that the clutch bucket has been runnable | |
898 | * for a long time but none of its threads have been picked up for execution. In that case, both | |
899 | * the CPU used and blocked would be 0. | |
900 | */ | |
901 | if ((cpu_blocked == 0) && (cpu_used == 0)) { | |
902 | return clutch_bucket->scb_interactivity_score; | |
903 | } | |
904 | ||
905 | /* | |
906 | * For all timeshare buckets, calculate the interactivity score of the bucket | |
907 | * and add it to the base priority | |
908 | */ | |
909 | uint8_t interactive_score = 0; | |
910 | if (cpu_blocked > cpu_used) { | |
911 | /* Interactive clutch_bucket case */ | |
912 | interactive_score = sched_clutch_bucket_interactive_pri + | |
913 | ((sched_clutch_bucket_interactive_pri * (cpu_blocked - cpu_used)) / cpu_blocked); | |
914 | } else { | |
915 | /* Non-interactive clutch_bucket case */ | |
916 | interactive_score = ((sched_clutch_bucket_interactive_pri * cpu_blocked) / cpu_used); | |
917 | } | |
918 | clutch_bucket->scb_interactivity_score = interactive_score; | |
919 | return interactive_score; | |
920 | } | |
921 | ||
922 | /* | |
923 | * sched_clutch_bucket_pri_calculate() | |
924 | * | |
925 | * The priority calculation algorithm for the clutch_bucket is a slight | |
926 | * modification on the ULE interactivity score. It uses the base priority | |
927 | * of the clutch bucket and applies an interactivity score boost to the | |
928 | * highly responsive clutch buckets. | |
929 | */ | |
930 | ||
931 | static uint8_t | |
932 | sched_clutch_bucket_pri_calculate( | |
933 | sched_clutch_bucket_t clutch_bucket, | |
934 | uint64_t timestamp) | |
935 | { | |
936 | /* For empty clutch buckets, return priority 0 */ | |
937 | if (clutch_bucket->scb_thr_count == 0) { | |
938 | return 0; | |
939 | } | |
940 | ||
941 | uint8_t base_pri = sched_clutch_bucket_base_pri(clutch_bucket); | |
942 | uint8_t interactive_score = sched_clutch_bucket_interactivity_score_calculate(clutch_bucket, timestamp); | |
943 | ||
944 | assert(((uint64_t)base_pri + interactive_score) <= UINT8_MAX); | |
945 | uint8_t pri = base_pri + interactive_score; | |
946 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_PRI) | DBG_FUNC_NONE, | |
947 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, pri, interactive_score, 0); | |
948 | return pri; | |
949 | } | |
950 | ||
951 | /* | |
952 | * sched_clutch_root_bucket_highest_clutch_bucket() | |
953 | * | |
954 | * Routine to find the highest priority clutch bucket | |
955 | * within the root bucket. | |
956 | */ | |
957 | static sched_clutch_bucket_t | |
958 | sched_clutch_root_bucket_highest_clutch_bucket( | |
959 | sched_clutch_root_bucket_t root_bucket) | |
960 | { | |
961 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
962 | return NULL; | |
963 | } | |
964 | return priority_queue_max(&root_bucket->scrb_clutch_buckets, struct sched_clutch_bucket, scb_pqlink); | |
965 | } | |
966 | ||
967 | /* | |
968 | * sched_clutch_bucket_runnable() | |
969 | * | |
970 | * Perform all operations needed when a new clutch bucket becomes runnable. | |
971 | * It involves inserting the clutch_bucket into the hierarchy and updating the | |
972 | * root priority appropriately. | |
973 | */ | |
974 | static boolean_t | |
975 | sched_clutch_bucket_runnable( | |
976 | sched_clutch_bucket_t clutch_bucket, | |
977 | sched_clutch_root_t root_clutch, | |
978 | uint64_t timestamp) | |
979 | { | |
980 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
981 | sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp); | |
982 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
983 | sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp); | |
984 | /* Update the timesharing properties of this clutch_bucket; also done every sched_tick */ | |
985 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
986 | int16_t root_old_pri = root_clutch->scr_priority; | |
987 | sched_clutch_root_pri_update(root_clutch); | |
988 | return root_clutch->scr_priority > root_old_pri; | |
989 | } | |
990 | ||
991 | /* | |
992 | * sched_clutch_bucket_update() | |
993 | * | |
994 | * Update the clutch_bucket's position in the hierarchy based on whether | |
995 | * the newly runnable thread changes its priority. Also update the root | |
996 | * priority accordingly. | |
997 | */ | |
998 | static boolean_t | |
999 | sched_clutch_bucket_update( | |
1000 | sched_clutch_bucket_t clutch_bucket, | |
1001 | sched_clutch_root_t root_clutch, | |
1002 | uint64_t timestamp) | |
1003 | { | |
1004 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1005 | uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
1006 | if (new_pri == clutch_bucket->scb_priority) { | |
1007 | return false; | |
1008 | } | |
1009 | struct priority_queue *bucket_prioq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets; | |
1010 | ||
1011 | if (new_pri < clutch_bucket->scb_priority) { | |
1012 | clutch_bucket->scb_priority = new_pri; | |
1013 | priority_queue_entry_decrease(bucket_prioq, &clutch_bucket->scb_pqlink, | |
1014 | clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1015 | } else { | |
1016 | clutch_bucket->scb_priority = new_pri; | |
1017 | priority_queue_entry_increase(bucket_prioq, &clutch_bucket->scb_pqlink, | |
1018 | clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1019 | } | |
1020 | ||
1021 | int16_t root_old_pri = root_clutch->scr_priority; | |
1022 | sched_clutch_root_pri_update(root_clutch); | |
1023 | return root_clutch->scr_priority > root_old_pri; | |
1024 | } | |
1025 | ||
1026 | /* | |
1027 | * sched_clutch_bucket_empty() | |
1028 | * | |
1029 | * Perform all the operations needed when a clutch_bucket is no longer runnable. | |
1030 | * It involves removing the clutch bucket from the hierarchy and updaing the root | |
1031 | * priority appropriately. | |
1032 | */ | |
1033 | static void | |
1034 | sched_clutch_bucket_empty( | |
1035 | sched_clutch_bucket_t clutch_bucket, | |
1036 | sched_clutch_root_t root_clutch, | |
1037 | uint64_t timestamp) | |
1038 | { | |
1039 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1040 | sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp); | |
1041 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
1042 | sched_clutch_root_pri_update(root_clutch); | |
1043 | } | |
1044 | ||
1045 | /* | |
1046 | * sched_clutch_cpu_usage_update() | |
1047 | * | |
1048 | * Routine to update CPU usage of the thread in the hierarchy. | |
1049 | */ | |
1050 | void | |
1051 | sched_clutch_cpu_usage_update( | |
1052 | thread_t thread, | |
1053 | uint64_t delta) | |
1054 | { | |
1055 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1056 | return; | |
1057 | } | |
1058 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1059 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1060 | sched_clutch_bucket_cpu_usage_update(clutch_bucket, delta); | |
1061 | } | |
1062 | ||
1063 | /* | |
1064 | * sched_clutch_bucket_cpu_usage_update() | |
1065 | * | |
1066 | * Routine to update the CPU usage of the clutch_bucket. | |
1067 | */ | |
1068 | static void | |
1069 | sched_clutch_bucket_cpu_usage_update( | |
1070 | sched_clutch_bucket_t clutch_bucket, | |
1071 | uint64_t delta) | |
1072 | { | |
1073 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
1074 | /* Since Above UI bucket has maximum interactivity score always, nothing to do here */ | |
1075 | return; | |
1076 | } | |
1077 | ||
1078 | /* | |
1079 | * The CPU usage should not overflow the clutch_cpu_data_t type. Since the usage is used to | |
1080 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1081 | */ | |
1082 | delta = MIN(delta, CLUTCH_CPU_DATA_MAX); | |
1083 | os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_used), (clutch_cpu_data_t)delta, relaxed); | |
1084 | } | |
1085 | ||
1086 | /* | |
1087 | * sched_clutch_bucket_cpu_blocked_update() | |
1088 | * | |
1089 | * Routine to update CPU blocked time for clutch_bucket. | |
1090 | */ | |
1091 | static void | |
1092 | sched_clutch_bucket_cpu_blocked_update( | |
1093 | sched_clutch_bucket_t clutch_bucket, | |
1094 | uint64_t timestamp) | |
1095 | { | |
1096 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
1097 | (clutch_bucket->scb_blocked_ts == SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID)) { | |
1098 | /* For Above UI bucket and a newly initialized clutch bucket, nothing to do here */ | |
1099 | return; | |
1100 | } | |
1101 | ||
1102 | uint64_t blocked_time = timestamp - clutch_bucket->scb_blocked_ts; | |
1103 | if (blocked_time > sched_clutch_bucket_adjust_threshold) { | |
1104 | blocked_time = sched_clutch_bucket_adjust_threshold; | |
1105 | } | |
1106 | ||
1107 | /* | |
1108 | * The CPU blocked should not overflow the clutch_cpu_data_t type. Since the blocked is used to | |
1109 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1110 | */ | |
1111 | blocked_time = MIN(blocked_time, CLUTCH_CPU_DATA_MAX); | |
1112 | clutch_cpu_data_t __assert_only cpu_blocked_orig = os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_blocked), (clutch_cpu_data_t)blocked_time, relaxed); | |
1113 | /* The blocked time is scaled every so often, it should never overflow */ | |
1114 | assert(blocked_time <= (CLUTCH_CPU_DATA_MAX - cpu_blocked_orig)); | |
1115 | } | |
1116 | ||
1117 | /* | |
1118 | * sched_clutch_bucket_cpu_adjust() | |
1119 | * | |
1120 | * Routine to scale the cpu usage and blocked time once the sum gets bigger | |
1121 | * than sched_clutch_bucket_adjust_threshold. Allows the values to remain | |
1122 | * manageable and maintain the same ratio while allowing clutch buckets to | |
1123 | * adjust behavior and reflect in the interactivity score in a reasonable | |
1124 | * amount of time. | |
1125 | */ | |
1126 | static void | |
1127 | sched_clutch_bucket_cpu_adjust( | |
1128 | sched_clutch_bucket_t clutch_bucket) | |
1129 | { | |
1130 | sched_clutch_bucket_cpu_data_t old_cpu_data = {}; | |
1131 | sched_clutch_bucket_cpu_data_t new_cpu_data = {}; | |
1132 | do { | |
1133 | old_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
1134 | clutch_cpu_data_t cpu_used = old_cpu_data.cpu_data.scbcd_cpu_used; | |
1135 | clutch_cpu_data_t cpu_blocked = old_cpu_data.cpu_data.scbcd_cpu_blocked; | |
1136 | if ((cpu_used + cpu_blocked) < sched_clutch_bucket_adjust_threshold) { | |
1137 | return; | |
1138 | } | |
1139 | ||
1140 | /* | |
1141 | * The accumulation of CPU used and blocked is past the threshold; scale it | |
1142 | * down to lose old history. | |
1143 | */ | |
1144 | new_cpu_data.cpu_data.scbcd_cpu_used = cpu_used / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1145 | new_cpu_data.cpu_data.scbcd_cpu_blocked = cpu_blocked / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1146 | } while (!os_atomic_cmpxchg(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, old_cpu_data.scbcd_cpu_data_packed, new_cpu_data.scbcd_cpu_data_packed, relaxed)); | |
1147 | } | |
1148 | ||
1149 | /* | |
1150 | * Thread level scheduling algorithm | |
1151 | * | |
1152 | * The thread level scheduling algorithm uses the mach timeshare | |
1153 | * decay based algorithm to achieve sharing between threads within the | |
1154 | * same clutch bucket. The load/priority shifts etc. are all maintained | |
1155 | * at the clutch bucket level and used for decay calculation of the | |
1156 | * threads. The load sampling is still driven off the scheduler tick | |
1157 | * for runnable clutch buckets (it does not use the new higher frequency | |
1158 | * EWMA based load calculation). The idea is that the contention and load | |
1159 | * within clutch_buckets should be limited enough to not see heavy decay | |
1160 | * and timeshare effectively. | |
1161 | */ | |
1162 | ||
1163 | /* | |
1164 | * sched_clutch_thread_run_bucket_incr() / sched_clutch_run_bucket_incr() | |
1165 | * | |
1166 | * Increment the run count for the clutch bucket associated with the | |
1167 | * thread. | |
1168 | */ | |
1169 | uint32_t | |
1170 | sched_clutch_thread_run_bucket_incr( | |
1171 | thread_t thread, | |
1172 | sched_bucket_t bucket) | |
1173 | { | |
1174 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1175 | return 0; | |
1176 | } | |
1177 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1178 | return sched_clutch_run_bucket_incr(clutch, bucket); | |
1179 | } | |
1180 | ||
1181 | static uint32_t | |
1182 | sched_clutch_run_bucket_incr( | |
1183 | sched_clutch_t clutch, | |
1184 | sched_bucket_t bucket) | |
1185 | { | |
1186 | assert(bucket != TH_BUCKET_RUN); | |
1187 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1188 | uint32_t result = os_atomic_inc(&(clutch_bucket->scb_run_count), relaxed); | |
1189 | return result; | |
1190 | } | |
1191 | ||
1192 | /* | |
1193 | * sched_clutch_thread_run_bucket_decr() / sched_clutch_run_bucket_decr() | |
1194 | * | |
1195 | * Decrement the run count for the clutch bucket associated with the | |
1196 | * thread. | |
1197 | */ | |
1198 | uint32_t | |
1199 | sched_clutch_thread_run_bucket_decr( | |
1200 | thread_t thread, | |
1201 | sched_bucket_t bucket) | |
1202 | { | |
1203 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1204 | return 0; | |
1205 | } | |
1206 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1207 | return sched_clutch_run_bucket_decr(clutch, bucket); | |
1208 | } | |
1209 | ||
1210 | static uint32_t | |
1211 | sched_clutch_run_bucket_decr( | |
1212 | sched_clutch_t clutch, | |
1213 | sched_bucket_t bucket) | |
1214 | { | |
1215 | assert(bucket != TH_BUCKET_RUN); | |
1216 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1217 | uint32_t result = os_atomic_dec(&(clutch_bucket->scb_run_count), relaxed); | |
1218 | return result; | |
1219 | } | |
1220 | ||
1221 | /* | |
1222 | * sched_clutch_bucket_timeshare_update() | |
1223 | * | |
1224 | * Routine to update the load and priority shift for the clutch_bucket every | |
1225 | * sched_tick. For runnable clutch_buckets, the sched tick handling code | |
1226 | * iterates the clutch buckets and calls this routine. For all others, the | |
1227 | * clutch_bucket maintains a "last updated schedtick" parameter. As threads | |
1228 | * become runnable in the clutch bucket, if this value is outdated, the load | |
1229 | * and shifts are updated. | |
1230 | * | |
1231 | * Possible optimization: | |
1232 | * - The current algorithm samples the load every sched tick (125ms). | |
1233 | * This is prone to spikes in runnable counts; if that turns out to be | |
1234 | * a problem, a simple solution would be to do the EWMA trick to sample | |
1235 | * load at every load_tick (30ms) and use the averaged value for the pri | |
1236 | * shift calculation. | |
1237 | */ | |
1238 | static void | |
1239 | sched_clutch_bucket_timeshare_update( | |
1240 | sched_clutch_bucket_t clutch_bucket) | |
1241 | { | |
1242 | if (clutch_bucket->scb_bucket < TH_BUCKET_SHARE_FG) { | |
1243 | return; | |
1244 | } | |
1245 | ||
1246 | /* | |
1247 | * Update the timeshare parameters for the clutch bucket if they havent been updated | |
1248 | * in this tick. | |
1249 | */ | |
1250 | uint32_t bucket_sched_ts = os_atomic_load(&clutch_bucket->scb_timeshare_tick, relaxed); | |
1251 | uint32_t current_sched_ts = sched_tick; | |
1252 | if (bucket_sched_ts != current_sched_ts) { | |
1253 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, current_sched_ts, relaxed); | |
1254 | uint32_t bucket_load = (os_atomic_load(&clutch_bucket->scb_run_count, relaxed) / processor_avail_count); | |
1255 | bucket_load = MIN(bucket_load, NRQS - 1); | |
1256 | uint32_t pri_shift = sched_fixed_shift - sched_load_shifts[bucket_load]; | |
1257 | os_atomic_store(&clutch_bucket->scb_pri_shift, pri_shift, relaxed); | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | /* | |
1262 | * sched_clutch_thread_clutch_update() | |
1263 | * | |
1264 | * Routine called when the thread changes its thread group. The current | |
1265 | * implementation relies on the fact that the thread group is changed only | |
1266 | * from the context of the thread itself. Due to this fact, the thread | |
1267 | * group change causes only counter updates in the old & new clutch | |
1268 | * buckets and no hierarchy changes. The routine also attributes the CPU | |
1269 | * used so far to the old clutch. | |
1270 | */ | |
1271 | void | |
1272 | sched_clutch_thread_clutch_update( | |
1273 | thread_t thread, | |
1274 | sched_clutch_t old_clutch, | |
1275 | sched_clutch_t new_clutch) | |
1276 | { | |
1277 | uint32_t cpu_delta; | |
1278 | assert(current_thread() == thread); | |
1279 | ||
1280 | if (old_clutch) { | |
1281 | sched_clutch_run_bucket_decr(old_clutch, thread->th_sched_bucket); | |
1282 | /* | |
1283 | * Calculate the CPU used by this thread in the old bucket and | |
1284 | * add it to the old clutch bucket. This uses the same CPU usage | |
1285 | * logic as update_priority etc. | |
1286 | */ | |
1287 | thread_timer_delta(thread, cpu_delta); | |
1288 | if (thread->pri_shift < INT8_MAX) { | |
1289 | thread->sched_usage += cpu_delta; | |
1290 | } | |
1291 | thread->cpu_delta += cpu_delta; | |
1292 | sched_clutch_bucket_cpu_usage_update(&(old_clutch->sc_clutch_buckets[thread->th_sched_bucket]), cpu_delta); | |
1293 | } | |
1294 | ||
1295 | if (new_clutch) { | |
1296 | sched_clutch_run_bucket_incr(new_clutch, thread->th_sched_bucket); | |
1297 | } | |
1298 | } | |
1299 | ||
1300 | /* Thread Insertion/Removal/Selection routines */ | |
1301 | ||
1302 | /* | |
1303 | * sched_clutch_thread_insert() | |
1304 | * | |
1305 | * Routine to insert a thread into the sched clutch hierarchy. | |
1306 | * Update the counts at all levels of the hierarchy and insert the nodes | |
1307 | * as they become runnable. Always called with the pset lock held. | |
1308 | */ | |
1309 | static boolean_t | |
1310 | sched_clutch_thread_insert( | |
1311 | sched_clutch_root_t root_clutch, | |
1312 | thread_t thread, | |
1313 | integer_t options) | |
1314 | { | |
1315 | boolean_t result = FALSE; | |
1316 | ||
1317 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1318 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1319 | assert(thread->thread_group == clutch->sc_tg); | |
1320 | ||
1321 | uint64_t current_timestamp = mach_absolute_time(); | |
1322 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1323 | assert((clutch_bucket->scb_root == NULL) || (clutch_bucket->scb_root == root_clutch)); | |
1324 | ||
1325 | /* Insert thread into the clutch_bucket runq using sched_pri */ | |
1326 | run_queue_enqueue(&clutch_bucket->scb_runq, thread, options); | |
1327 | /* Increment the urgency counter for the root if necessary */ | |
1328 | sched_clutch_root_urgency_inc(root_clutch, thread); | |
1329 | ||
1330 | /* Insert thread into clutch_bucket priority queue based on the promoted or base priority */ | |
1331 | priority_queue_insert(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1332 | sched_thread_sched_pri_promoted(thread) ? thread->sched_pri : thread->base_pri, | |
1333 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1334 | os_atomic_inc(&clutch->sc_thr_count, relaxed); | |
1335 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1336 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0); | |
1337 | ||
1338 | /* Enqueue the clutch into the hierarchy (if needed) and update properties */ | |
1339 | if (clutch_bucket->scb_thr_count == 0) { | |
1340 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1341 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
1342 | /* Insert the newly runnable clutch bucket into the hierarchy */ | |
1343 | result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp); | |
1344 | } else { | |
1345 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1346 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
1347 | /* Update the position of the clutch bucket in the hierarchy */ | |
1348 | result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp); | |
1349 | } | |
1350 | return result; | |
1351 | } | |
1352 | ||
1353 | /* | |
1354 | * sched_clutch_thread_remove() | |
1355 | * | |
1356 | * Routine to remove a thread from the sched clutch hierarchy. | |
1357 | * Update the counts at all levels of the hierarchy and remove the nodes | |
1358 | * as they become empty. Always called with the pset lock held. | |
1359 | */ | |
1360 | static void | |
1361 | sched_clutch_thread_remove( | |
1362 | sched_clutch_root_t root_clutch, | |
1363 | thread_t thread, | |
1364 | uint64_t current_timestamp) | |
1365 | { | |
1366 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1367 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1368 | assert(thread->thread_group == clutch->sc_tg); | |
1369 | assert(thread->runq != PROCESSOR_NULL); | |
1370 | ||
1371 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1372 | assert(clutch_bucket->scb_root == root_clutch); | |
1373 | ||
1374 | /* Decrement the urgency counter for the root if necessary */ | |
1375 | sched_clutch_root_urgency_dec(root_clutch, thread); | |
1376 | /* Remove thread from the clutch_bucket */ | |
1377 | run_queue_remove(&clutch_bucket->scb_runq, thread); | |
1378 | ||
1379 | priority_queue_remove(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1380 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1381 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1382 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_EMPTY, 0); | |
1383 | ||
1384 | /* Update counts at various levels of the hierarchy */ | |
1385 | os_atomic_dec(&clutch->sc_thr_count, relaxed); | |
1386 | sched_clutch_thr_count_dec(&root_clutch->scr_thr_count); | |
1387 | sched_clutch_thr_count_dec(&clutch_bucket->scb_thr_count); | |
1388 | ||
1389 | /* Remove the clutch from hierarchy (if needed) and update properties */ | |
1390 | if (clutch_bucket->scb_thr_count == 0) { | |
1391 | sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp); | |
1392 | } else { | |
1393 | sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp); | |
1394 | } | |
1395 | } | |
1396 | ||
1397 | /* | |
1398 | * sched_clutch_thread_highest() | |
1399 | * | |
1400 | * Routine to find and remove the highest priority thread | |
1401 | * from the sched clutch hierarchy. The algorithm looks at the | |
1402 | * hierarchy for the most eligible runnable thread and calls | |
1403 | * sched_clutch_thread_remove(). Always called with the | |
1404 | * pset lock held. | |
1405 | */ | |
1406 | static thread_t | |
1407 | sched_clutch_thread_highest( | |
1408 | sched_clutch_root_t root_clutch) | |
1409 | { | |
1410 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1411 | uint64_t current_timestamp = mach_absolute_time(); | |
1412 | ||
1413 | /* Select the highest priority root bucket */ | |
1414 | sched_clutch_root_bucket_t root_bucket = sched_clutch_root_highest_root_bucket(root_clutch, current_timestamp); | |
1415 | if (root_bucket == NULL) { | |
1416 | return THREAD_NULL; | |
1417 | } | |
1418 | /* Since a thread is being picked from this root bucket, update its deadline */ | |
1419 | sched_clutch_root_bucket_deadline_update(root_bucket, root_clutch, current_timestamp); | |
1420 | ||
1421 | /* Find the highest priority clutch bucket in this root bucket */ | |
1422 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
1423 | assert(clutch_bucket != NULL); | |
1424 | ||
1425 | /* Find the highest priority runnable thread in this clutch bucket */ | |
1426 | thread_t thread = run_queue_peek(&clutch_bucket->scb_runq); | |
1427 | assert(thread != NULL); | |
1428 | ||
1429 | /* Remove and return the thread from the hierarchy */ | |
1430 | sched_clutch_thread_remove(root_clutch, thread, current_timestamp); | |
1431 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE, | |
1432 | thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0); | |
1433 | return thread; | |
1434 | } | |
1435 | ||
1436 | ||
1437 | /* High level global accessor routines */ | |
1438 | ||
1439 | /* | |
1440 | * sched_clutch_root_urgency() | |
1441 | * | |
1442 | * Routine to get the urgency of the highest runnable | |
1443 | * thread in the hierarchy. | |
1444 | */ | |
1445 | static uint32_t | |
1446 | sched_clutch_root_urgency( | |
1447 | sched_clutch_root_t root_clutch) | |
1448 | { | |
1449 | return root_clutch->scr_urgency; | |
1450 | } | |
1451 | ||
1452 | /* | |
1453 | * sched_clutch_root_count_sum() | |
1454 | * | |
1455 | * The count_sum mechanism is used for scheduler runq | |
1456 | * statistics calculation. Its only useful for debugging | |
1457 | * purposes; since it takes a mach_absolute_time() on | |
1458 | * other scheduler implementations, its better to avoid | |
1459 | * populating this until absolutely necessary. | |
1460 | */ | |
1461 | static uint32_t | |
1462 | sched_clutch_root_count_sum( | |
1463 | __unused sched_clutch_root_t root_clutch) | |
1464 | { | |
1465 | return 0; | |
1466 | } | |
1467 | ||
1468 | /* | |
1469 | * sched_clutch_root_priority() | |
1470 | * | |
1471 | * Routine to get the priority of the highest runnable | |
1472 | * thread in the hierarchy. | |
1473 | */ | |
1474 | static int | |
1475 | sched_clutch_root_priority( | |
1476 | sched_clutch_root_t root_clutch) | |
1477 | { | |
1478 | return root_clutch->scr_priority; | |
1479 | } | |
1480 | ||
1481 | /* | |
1482 | * sched_clutch_root_count() | |
1483 | * | |
1484 | * Returns total number of runnable threads in the hierarchy. | |
1485 | */ | |
1486 | uint32_t | |
1487 | sched_clutch_root_count( | |
1488 | sched_clutch_root_t root_clutch) | |
1489 | { | |
1490 | return root_clutch->scr_thr_count; | |
1491 | } | |
1492 | ||
1493 | /* | |
1494 | * sched_clutch_thread_pri_shift() | |
1495 | * | |
1496 | * Routine to get the priority shift value for a thread. | |
1497 | * Since the timesharing is done at the clutch_bucket level, | |
1498 | * this routine gets the clutch_bucket and retrieves the | |
1499 | * values from there. | |
1500 | */ | |
1501 | uint32_t | |
1502 | sched_clutch_thread_pri_shift( | |
1503 | thread_t thread, | |
1504 | sched_bucket_t bucket) | |
1505 | { | |
1506 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1507 | return UINT8_MAX; | |
1508 | } | |
1509 | assert(bucket != TH_BUCKET_RUN); | |
1510 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1511 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1512 | return os_atomic_load(&clutch_bucket->scb_pri_shift, relaxed); | |
1513 | } | |
1514 | ||
1515 | #pragma mark -- Clutch Scheduler Algorithm | |
1516 | ||
1517 | static void | |
1518 | sched_clutch_init(void); | |
1519 | ||
1520 | static void | |
1521 | sched_clutch_timebase_init(void); | |
1522 | ||
1523 | static thread_t | |
1524 | sched_clutch_steal_thread(processor_set_t pset); | |
1525 | ||
1526 | static void | |
1527 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context); | |
1528 | ||
1529 | static boolean_t | |
1530 | sched_clutch_processor_enqueue(processor_t processor, thread_t thread, | |
1531 | sched_options_t options); | |
1532 | ||
1533 | static boolean_t | |
1534 | sched_clutch_processor_queue_remove(processor_t processor, thread_t thread); | |
1535 | ||
1536 | static ast_t | |
1537 | sched_clutch_processor_csw_check(processor_t processor); | |
1538 | ||
1539 | static boolean_t | |
1540 | sched_clutch_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); | |
1541 | ||
1542 | static int | |
1543 | sched_clutch_runq_count(processor_t processor); | |
1544 | ||
1545 | static boolean_t | |
1546 | sched_clutch_processor_queue_empty(processor_t processor); | |
1547 | ||
1548 | static uint64_t | |
1549 | sched_clutch_runq_stats_count_sum(processor_t processor); | |
1550 | ||
1551 | static int | |
1552 | sched_clutch_processor_bound_count(processor_t processor); | |
1553 | ||
1554 | static void | |
1555 | sched_clutch_pset_init(processor_set_t pset); | |
1556 | ||
1557 | static void | |
1558 | sched_clutch_processor_init(processor_t processor); | |
1559 | ||
1560 | static thread_t | |
1561 | sched_clutch_choose_thread(processor_t processor, int priority, ast_t reason); | |
1562 | ||
1563 | static void | |
1564 | sched_clutch_processor_queue_shutdown(processor_t processor); | |
1565 | ||
1566 | static sched_mode_t | |
1567 | sched_clutch_initial_thread_sched_mode(task_t parent_task); | |
1568 | ||
1569 | static uint32_t | |
1570 | sched_clutch_initial_quantum_size(thread_t thread); | |
1571 | ||
1572 | static bool | |
1573 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread); | |
1574 | ||
1575 | static uint32_t | |
1576 | sched_clutch_run_incr(thread_t thread); | |
1577 | ||
1578 | static uint32_t | |
1579 | sched_clutch_run_decr(thread_t thread); | |
1580 | ||
1581 | static void | |
1582 | sched_clutch_update_thread_bucket(thread_t thread); | |
1583 | ||
1584 | const struct sched_dispatch_table sched_clutch_dispatch = { | |
1585 | .sched_name = "clutch", | |
1586 | .init = sched_clutch_init, | |
1587 | .timebase_init = sched_clutch_timebase_init, | |
1588 | .processor_init = sched_clutch_processor_init, | |
1589 | .pset_init = sched_clutch_pset_init, | |
1590 | .maintenance_continuation = sched_timeshare_maintenance_continue, | |
1591 | .choose_thread = sched_clutch_choose_thread, | |
1592 | .steal_thread_enabled = sched_steal_thread_enabled, | |
1593 | .steal_thread = sched_clutch_steal_thread, | |
1594 | .compute_timeshare_priority = sched_compute_timeshare_priority, | |
1595 | .choose_processor = choose_processor, | |
1596 | .processor_enqueue = sched_clutch_processor_enqueue, | |
1597 | .processor_queue_shutdown = sched_clutch_processor_queue_shutdown, | |
1598 | .processor_queue_remove = sched_clutch_processor_queue_remove, | |
1599 | .processor_queue_empty = sched_clutch_processor_queue_empty, | |
1600 | .priority_is_urgent = priority_is_urgent, | |
1601 | .processor_csw_check = sched_clutch_processor_csw_check, | |
1602 | .processor_queue_has_priority = sched_clutch_processor_queue_has_priority, | |
1603 | .initial_quantum_size = sched_clutch_initial_quantum_size, | |
1604 | .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode, | |
1605 | .can_update_priority = can_update_priority, | |
1606 | .update_priority = update_priority, | |
1607 | .lightweight_update_priority = lightweight_update_priority, | |
1608 | .quantum_expire = sched_default_quantum_expire, | |
1609 | .processor_runq_count = sched_clutch_runq_count, | |
1610 | .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum, | |
1611 | .processor_bound_count = sched_clutch_processor_bound_count, | |
1612 | .thread_update_scan = sched_clutch_thread_update_scan, | |
1613 | .multiple_psets_enabled = TRUE, | |
1614 | .sched_groups_enabled = FALSE, | |
1615 | .avoid_processor_enabled = TRUE, | |
1616 | .thread_avoid_processor = sched_clutch_thread_avoid_processor, | |
1617 | .processor_balance = sched_SMT_balance, | |
1618 | ||
1619 | .rt_runq = sched_rtglobal_runq, | |
1620 | .rt_init = sched_rtglobal_init, | |
1621 | .rt_queue_shutdown = sched_rtglobal_queue_shutdown, | |
1622 | .rt_runq_scan = sched_rtglobal_runq_scan, | |
1623 | .rt_runq_count_sum = sched_rtglobal_runq_count_sum, | |
1624 | ||
1625 | .qos_max_parallelism = sched_qos_max_parallelism, | |
1626 | .check_spill = sched_check_spill, | |
1627 | .ipi_policy = sched_ipi_policy, | |
1628 | .thread_should_yield = sched_thread_should_yield, | |
1629 | .run_count_incr = sched_clutch_run_incr, | |
1630 | .run_count_decr = sched_clutch_run_decr, | |
1631 | .update_thread_bucket = sched_clutch_update_thread_bucket, | |
1632 | .pset_made_schedulable = sched_pset_made_schedulable, | |
1633 | }; | |
1634 | ||
1635 | __attribute__((always_inline)) | |
1636 | static inline run_queue_t | |
1637 | sched_clutch_bound_runq(processor_t processor) | |
1638 | { | |
1639 | return &processor->runq; | |
1640 | } | |
1641 | ||
1642 | __attribute__((always_inline)) | |
1643 | static inline sched_clutch_root_t | |
1644 | sched_clutch_processor_root_clutch(processor_t processor) | |
1645 | { | |
1646 | return &processor->processor_set->pset_clutch_root; | |
1647 | } | |
1648 | ||
1649 | __attribute__((always_inline)) | |
1650 | static inline run_queue_t | |
1651 | sched_clutch_thread_bound_runq(processor_t processor, __assert_only thread_t thread) | |
1652 | { | |
1653 | assert(thread->bound_processor == processor); | |
1654 | return sched_clutch_bound_runq(processor); | |
1655 | } | |
1656 | ||
1657 | static uint32_t | |
1658 | sched_clutch_initial_quantum_size(thread_t thread) | |
1659 | { | |
1660 | if (thread == THREAD_NULL) { | |
1661 | return std_quantum; | |
1662 | } | |
1663 | assert(sched_clutch_thread_quantum[thread->th_sched_bucket] <= UINT32_MAX); | |
1664 | return (uint32_t)sched_clutch_thread_quantum[thread->th_sched_bucket]; | |
1665 | } | |
1666 | ||
1667 | static sched_mode_t | |
1668 | sched_clutch_initial_thread_sched_mode(task_t parent_task) | |
1669 | { | |
1670 | if (parent_task == kernel_task) { | |
1671 | return TH_MODE_FIXED; | |
1672 | } else { | |
1673 | return TH_MODE_TIMESHARE; | |
1674 | } | |
1675 | } | |
1676 | ||
1677 | static void | |
1678 | sched_clutch_processor_init(processor_t processor) | |
1679 | { | |
1680 | run_queue_init(&processor->runq); | |
1681 | } | |
1682 | ||
1683 | static void | |
1684 | sched_clutch_pset_init(processor_set_t pset) | |
1685 | { | |
1686 | sched_clutch_root_init(&pset->pset_clutch_root, pset); | |
1687 | } | |
1688 | ||
1689 | static void | |
1690 | sched_clutch_init(void) | |
1691 | { | |
1692 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactive_pri", &sched_clutch_bucket_interactive_pri, sizeof(sched_clutch_bucket_interactive_pri))) { | |
1693 | sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
1694 | } | |
1695 | if (!PE_parse_boot_argn("sched_clutch_bucket_pri_boost", &sched_clutch_bucket_pri_boost, sizeof(sched_clutch_bucket_pri_boost))) { | |
1696 | sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
1697 | } | |
1698 | sched_timeshare_init(); | |
1699 | } | |
1700 | ||
1701 | static void | |
1702 | sched_clutch_timebase_init(void) | |
1703 | { | |
1704 | sched_timeshare_timebase_init(); | |
1705 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_wcel_us, sched_clutch_root_bucket_wcel); | |
1706 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_warp_us, sched_clutch_root_bucket_warp); | |
1707 | sched_clutch_us_to_abstime(sched_clutch_thread_quantum_us, sched_clutch_thread_quantum); | |
1708 | clock_interval_to_absolutetime_interval(SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS, | |
1709 | NSEC_PER_USEC, &sched_clutch_bucket_adjust_threshold); | |
1710 | ||
1711 | uint32_t interactivity_delta = 0; | |
1712 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactivity_delta_usecs", &interactivity_delta, sizeof(interactivity_delta))) { | |
1713 | interactivity_delta = SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT; | |
1714 | } | |
1715 | clock_interval_to_absolutetime_interval(interactivity_delta, NSEC_PER_USEC, &sched_clutch_bucket_interactivity_delta); | |
1716 | } | |
1717 | ||
1718 | static thread_t | |
1719 | sched_clutch_choose_thread( | |
1720 | processor_t processor, | |
1721 | int priority, | |
1722 | __unused ast_t reason) | |
1723 | { | |
1724 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
1725 | uint32_t clutch_count = sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)); | |
1726 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1727 | boolean_t choose_from_boundq = false; | |
1728 | ||
1729 | if (bound_runq->highq < priority && | |
1730 | clutch_pri < priority) { | |
1731 | return THREAD_NULL; | |
1732 | } | |
1733 | ||
1734 | if (bound_runq->count && clutch_count) { | |
1735 | if (bound_runq->highq >= clutch_pri) { | |
1736 | choose_from_boundq = true; | |
1737 | } | |
1738 | } else if (bound_runq->count) { | |
1739 | choose_from_boundq = true; | |
1740 | } else if (clutch_count) { | |
1741 | choose_from_boundq = false; | |
1742 | } else { | |
1743 | return THREAD_NULL; | |
1744 | } | |
1745 | ||
1746 | thread_t thread = THREAD_NULL; | |
1747 | if (choose_from_boundq == false) { | |
1748 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1749 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1750 | } else { | |
1751 | thread = run_queue_dequeue(bound_runq, SCHED_HEADQ); | |
1752 | } | |
1753 | return thread; | |
1754 | } | |
1755 | ||
1756 | static boolean_t | |
1757 | sched_clutch_processor_enqueue( | |
1758 | processor_t processor, | |
1759 | thread_t thread, | |
1760 | sched_options_t options) | |
1761 | { | |
1762 | boolean_t result; | |
1763 | ||
1764 | thread->runq = processor; | |
1765 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1766 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1767 | result = sched_clutch_thread_insert(pset_clutch_root, thread, options); | |
1768 | } else { | |
1769 | run_queue_t rq = sched_clutch_thread_bound_runq(processor, thread); | |
1770 | result = run_queue_enqueue(rq, thread, options); | |
1771 | } | |
1772 | return result; | |
1773 | } | |
1774 | ||
1775 | static boolean_t | |
1776 | sched_clutch_processor_queue_empty(processor_t processor) | |
1777 | { | |
1778 | return sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0 && | |
1779 | sched_clutch_bound_runq(processor)->count == 0; | |
1780 | } | |
1781 | ||
1782 | static ast_t | |
1783 | sched_clutch_processor_csw_check(processor_t processor) | |
1784 | { | |
1785 | boolean_t has_higher; | |
1786 | int pri; | |
1787 | ||
1788 | if (sched_clutch_thread_avoid_processor(processor, current_thread())) { | |
1789 | return AST_PREEMPT | AST_URGENT; | |
1790 | } | |
1791 | ||
1792 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1793 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
1794 | ||
1795 | assert(processor->active_thread != NULL); | |
1796 | ||
1797 | pri = MAX(clutch_pri, bound_runq->highq); | |
1798 | ||
1799 | if (processor->first_timeslice) { | |
1800 | has_higher = (pri > processor->current_pri); | |
1801 | } else { | |
1802 | has_higher = (pri >= processor->current_pri); | |
1803 | } | |
1804 | ||
1805 | if (has_higher) { | |
1806 | if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) { | |
1807 | return AST_PREEMPT | AST_URGENT; | |
1808 | } | |
1809 | ||
1810 | if (bound_runq->urgency > 0) { | |
1811 | return AST_PREEMPT | AST_URGENT; | |
1812 | } | |
1813 | ||
1814 | return AST_PREEMPT; | |
1815 | } | |
1816 | ||
1817 | return AST_NONE; | |
1818 | } | |
1819 | ||
1820 | static boolean_t | |
1821 | sched_clutch_processor_queue_has_priority(processor_t processor, | |
1822 | int priority, | |
1823 | boolean_t gte) | |
1824 | { | |
1825 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1826 | ||
1827 | int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq); | |
1828 | ||
1829 | if (gte) { | |
1830 | return qpri >= priority; | |
1831 | } else { | |
1832 | return qpri > priority; | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | static int | |
1837 | sched_clutch_runq_count(processor_t processor) | |
1838 | { | |
1839 | return (int)sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) + sched_clutch_bound_runq(processor)->count; | |
1840 | } | |
1841 | ||
1842 | static uint64_t | |
1843 | sched_clutch_runq_stats_count_sum(processor_t processor) | |
1844 | { | |
1845 | uint64_t bound_sum = sched_clutch_bound_runq(processor)->runq_stats.count_sum; | |
1846 | ||
1847 | if (processor->cpu_id == processor->processor_set->cpu_set_low) { | |
1848 | return bound_sum + sched_clutch_root_count_sum(sched_clutch_processor_root_clutch(processor)); | |
1849 | } else { | |
1850 | return bound_sum; | |
1851 | } | |
1852 | } | |
1853 | static int | |
1854 | sched_clutch_processor_bound_count(processor_t processor) | |
1855 | { | |
1856 | return sched_clutch_bound_runq(processor)->count; | |
1857 | } | |
1858 | ||
1859 | static void | |
1860 | sched_clutch_processor_queue_shutdown(processor_t processor) | |
1861 | { | |
1862 | processor_set_t pset = processor->processor_set; | |
1863 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1864 | thread_t thread; | |
1865 | queue_head_t tqueue; | |
1866 | ||
1867 | /* We only need to migrate threads if this is the last active processor in the pset */ | |
1868 | if (pset->online_processor_count > 0) { | |
1869 | pset_unlock(pset); | |
1870 | return; | |
1871 | } | |
1872 | ||
1873 | queue_init(&tqueue); | |
1874 | while (sched_clutch_root_count(pset_clutch_root) > 0) { | |
1875 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1876 | enqueue_tail(&tqueue, &thread->runq_links); | |
1877 | } | |
1878 | ||
1879 | pset_unlock(pset); | |
1880 | ||
1881 | qe_foreach_element_safe(thread, &tqueue, runq_links) { | |
1882 | remqueue(&thread->runq_links); | |
1883 | ||
1884 | thread_lock(thread); | |
1885 | ||
1886 | thread_setrun(thread, SCHED_TAILQ); | |
1887 | ||
1888 | thread_unlock(thread); | |
1889 | } | |
1890 | } | |
1891 | ||
1892 | static boolean_t | |
1893 | sched_clutch_processor_queue_remove( | |
1894 | processor_t processor, | |
1895 | thread_t thread) | |
1896 | { | |
1897 | run_queue_t rq; | |
1898 | processor_set_t pset = processor->processor_set; | |
1899 | ||
1900 | pset_lock(pset); | |
1901 | ||
1902 | if (processor == thread->runq) { | |
1903 | /* | |
1904 | * Thread is on a run queue and we have a lock on | |
1905 | * that run queue. | |
1906 | */ | |
1907 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1908 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1909 | sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time()); | |
1910 | } else { | |
1911 | rq = sched_clutch_thread_bound_runq(processor, thread); | |
1912 | run_queue_remove(rq, thread); | |
1913 | } | |
1914 | } else { | |
1915 | /* | |
1916 | * The thread left the run queue before we could | |
1917 | * lock the run queue. | |
1918 | */ | |
1919 | assert(thread->runq == PROCESSOR_NULL); | |
1920 | processor = PROCESSOR_NULL; | |
1921 | } | |
1922 | ||
1923 | pset_unlock(pset); | |
1924 | ||
1925 | return processor != PROCESSOR_NULL; | |
1926 | } | |
1927 | ||
1928 | static thread_t | |
1929 | sched_clutch_steal_thread(processor_set_t pset) | |
1930 | { | |
1931 | processor_set_t nset, cset = pset; | |
1932 | thread_t thread; | |
1933 | ||
1934 | do { | |
1935 | sched_clutch_root_t pset_clutch_root = &cset->pset_clutch_root; | |
1936 | if (sched_clutch_root_count(pset_clutch_root) > 0) { | |
1937 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1938 | pset_unlock(cset); | |
1939 | return thread; | |
1940 | } | |
1941 | ||
1942 | nset = next_pset(cset); | |
1943 | ||
1944 | if (nset != pset) { | |
1945 | pset_unlock(cset); | |
1946 | ||
1947 | cset = nset; | |
1948 | pset_lock(cset); | |
1949 | } | |
1950 | } while (nset != pset); | |
1951 | ||
1952 | pset_unlock(cset); | |
1953 | ||
1954 | return THREAD_NULL; | |
1955 | } | |
1956 | ||
1957 | static void | |
1958 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context) | |
1959 | { | |
1960 | boolean_t restart_needed = FALSE; | |
1961 | processor_t processor = processor_list; | |
1962 | processor_set_t pset; | |
1963 | thread_t thread; | |
1964 | spl_t s; | |
1965 | ||
1966 | /* | |
1967 | * We update the threads associated with each processor (bound and idle threads) | |
1968 | * and then update the threads in each pset runqueue. | |
1969 | */ | |
1970 | ||
1971 | do { | |
1972 | do { | |
1973 | pset = processor->processor_set; | |
1974 | ||
1975 | s = splsched(); | |
1976 | pset_lock(pset); | |
1977 | ||
1978 | restart_needed = runq_scan(sched_clutch_bound_runq(processor), scan_context); | |
1979 | ||
1980 | pset_unlock(pset); | |
1981 | splx(s); | |
1982 | ||
1983 | if (restart_needed) { | |
1984 | break; | |
1985 | } | |
1986 | ||
1987 | thread = processor->idle_thread; | |
1988 | if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { | |
1989 | if (thread_update_add_thread(thread) == FALSE) { | |
1990 | restart_needed = TRUE; | |
1991 | break; | |
1992 | } | |
1993 | } | |
1994 | } while ((processor = processor->processor_list) != NULL); | |
1995 | ||
1996 | /* Ok, we now have a collection of candidates -- fix them. */ | |
1997 | thread_update_process_threads(); | |
1998 | } while (restart_needed); | |
1999 | ||
2000 | pset = &pset0; | |
2001 | ||
2002 | do { | |
2003 | do { | |
2004 | s = splsched(); | |
2005 | pset_lock(pset); | |
2006 | ||
2007 | if (sched_clutch_root_count(&pset->pset_clutch_root) > 0) { | |
2008 | queue_t clutch_bucket_list = &pset->pset_clutch_root.scr_clutch_buckets; | |
2009 | sched_clutch_bucket_t clutch_bucket; | |
2010 | qe_foreach_element(clutch_bucket, clutch_bucket_list, scb_listlink) { | |
2011 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
2012 | restart_needed = runq_scan(&clutch_bucket->scb_runq, scan_context); | |
2013 | if (restart_needed) { | |
2014 | break; | |
2015 | } | |
2016 | } | |
2017 | } | |
2018 | ||
2019 | pset_unlock(pset); | |
2020 | splx(s); | |
2021 | if (restart_needed) { | |
2022 | break; | |
2023 | } | |
2024 | } while ((pset = pset->pset_list) != NULL); | |
2025 | ||
2026 | /* Ok, we now have a collection of candidates -- fix them. */ | |
2027 | thread_update_process_threads(); | |
2028 | } while (restart_needed); | |
2029 | } | |
2030 | ||
2031 | extern int sched_allow_rt_smt; | |
2032 | ||
2033 | /* Return true if this thread should not continue running on this processor */ | |
2034 | static bool | |
2035 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread) | |
2036 | { | |
2037 | if (processor->processor_primary != processor) { | |
2038 | /* | |
2039 | * This is a secondary SMT processor. If the primary is running | |
2040 | * a realtime thread, only allow realtime threads on the secondary. | |
2041 | */ | |
2042 | if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) || !sched_allow_rt_smt)) { | |
2043 | return true; | |
2044 | } | |
2045 | } | |
2046 | ||
2047 | return false; | |
2048 | } | |
2049 | ||
2050 | /* | |
2051 | * For the clutch scheduler, the run counts are maintained in the clutch | |
2052 | * buckets (i.e thread group scheduling structure). | |
2053 | */ | |
2054 | static uint32_t | |
2055 | sched_clutch_run_incr(thread_t thread) | |
2056 | { | |
2057 | assert((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN); | |
2058 | uint32_t new_count = os_atomic_inc(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2059 | sched_clutch_thread_run_bucket_incr(thread, thread->th_sched_bucket); | |
2060 | return new_count; | |
2061 | } | |
2062 | ||
2063 | static uint32_t | |
2064 | sched_clutch_run_decr(thread_t thread) | |
2065 | { | |
2066 | assert((thread->state & (TH_RUN | TH_IDLE)) != TH_RUN); | |
2067 | uint32_t new_count = os_atomic_dec(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2068 | sched_clutch_thread_run_bucket_decr(thread, thread->th_sched_bucket); | |
2069 | return new_count; | |
2070 | } | |
2071 | ||
2072 | static sched_bucket_t | |
2073 | sched_convert_pri_to_bucket(uint8_t priority) | |
2074 | { | |
2075 | sched_bucket_t bucket = TH_BUCKET_RUN; | |
2076 | ||
2077 | if (priority > BASEPRI_USER_INITIATED) { | |
2078 | bucket = TH_BUCKET_SHARE_FG; | |
2079 | } else if (priority > BASEPRI_DEFAULT) { | |
2080 | bucket = TH_BUCKET_SHARE_IN; | |
2081 | } else if (priority > BASEPRI_UTILITY) { | |
2082 | bucket = TH_BUCKET_SHARE_DF; | |
2083 | } else if (priority > MAXPRI_THROTTLE) { | |
2084 | bucket = TH_BUCKET_SHARE_UT; | |
2085 | } else { | |
2086 | bucket = TH_BUCKET_SHARE_BG; | |
2087 | } | |
2088 | return bucket; | |
2089 | } | |
2090 | ||
2091 | /* | |
2092 | * For threads that have changed sched_pri without changing the | |
2093 | * base_pri for any reason other than decay, use the sched_pri | |
2094 | * as the bucketizing priority instead of base_pri. All such | |
2095 | * changes are typically due to kernel locking primitives boosts | |
2096 | * or demotions. | |
2097 | */ | |
2098 | static boolean_t | |
2099 | sched_thread_sched_pri_promoted(thread_t thread) | |
2100 | { | |
2101 | return (thread->sched_flags & TH_SFLAG_PROMOTED) || | |
2102 | (thread->sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) || | |
2103 | (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) || | |
2104 | (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || | |
2105 | (thread->kern_promotion_schedpri != 0); | |
2106 | } | |
2107 | ||
2108 | /* | |
2109 | * Routine to update the scheduling bucket for the thread. | |
2110 | * | |
2111 | * In the clutch scheduler implementation, the thread's bucket | |
2112 | * is based on sched_pri if it was promoted due to a kernel | |
2113 | * primitive; otherwise its based on the thread base_pri. This | |
2114 | * enhancement allows promoted threads to reach a higher priority | |
2115 | * bucket and potentially get selected sooner for scheduling. | |
2116 | * | |
2117 | * Also, the clutch scheduler does not honor fixed priority below | |
2118 | * FG priority. It simply puts those threads in the corresponding | |
2119 | * timeshare bucket. The reason for to do that is because it is | |
2120 | * extremely hard to define the scheduling properties of such threads | |
2121 | * and they typically lead to performance issues. | |
2122 | */ | |
2123 | ||
2124 | void | |
2125 | sched_clutch_update_thread_bucket(thread_t thread) | |
2126 | { | |
2127 | sched_bucket_t old_bucket = thread->th_sched_bucket; | |
2128 | sched_bucket_t new_bucket = TH_BUCKET_RUN; | |
2129 | assert(thread->runq == PROCESSOR_NULL); | |
2130 | ||
2131 | int pri = (sched_thread_sched_pri_promoted(thread)) ? thread->sched_pri : thread->base_pri; | |
2132 | ||
2133 | switch (thread->sched_mode) { | |
2134 | case TH_MODE_FIXED: | |
2135 | if (pri >= BASEPRI_FOREGROUND) { | |
2136 | new_bucket = TH_BUCKET_FIXPRI; | |
2137 | } else { | |
2138 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2139 | } | |
2140 | break; | |
2141 | ||
2142 | case TH_MODE_REALTIME: | |
2143 | new_bucket = TH_BUCKET_FIXPRI; | |
2144 | break; | |
2145 | ||
2146 | case TH_MODE_TIMESHARE: | |
2147 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2148 | break; | |
2149 | ||
2150 | default: | |
2151 | panic("unexpected mode: %d", thread->sched_mode); | |
2152 | break; | |
2153 | } | |
2154 | ||
2155 | if (old_bucket == new_bucket) { | |
2156 | return; | |
2157 | } | |
2158 | ||
2159 | thread->th_sched_bucket = new_bucket; | |
2160 | thread->pri_shift = sched_clutch_thread_pri_shift(thread, new_bucket); | |
2161 | ||
2162 | /* | |
2163 | * Since this is called after the thread has been removed from the runq, | |
2164 | * only the run counts need to be updated. The re-insert into the runq | |
2165 | * would put the thread into the correct new bucket's runq. | |
2166 | */ | |
2167 | if ((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN) { | |
2168 | sched_clutch_thread_run_bucket_decr(thread, old_bucket); | |
2169 | sched_clutch_thread_run_bucket_incr(thread, new_bucket); | |
2170 | } | |
2171 | } | |
2172 | ||
2173 | ||
2174 | #endif /* CONFIG_SCHED_CLUTCH */ |