]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <mach/mach_types.h> | |
30 | #include <mach/machine.h> | |
31 | #include <machine/machine_routines.h> | |
32 | #include <machine/sched_param.h> | |
33 | #include <machine/machine_cpu.h> | |
34 | #include <kern/kern_types.h> | |
35 | #include <kern/debug.h> | |
36 | #include <kern/machine.h> | |
37 | #include <kern/misc_protos.h> | |
38 | #include <kern/processor.h> | |
39 | #include <kern/queue.h> | |
40 | #include <kern/sched.h> | |
41 | #include <kern/sched_prim.h> | |
42 | #include <kern/task.h> | |
43 | #include <kern/thread.h> | |
44 | #include <kern/sched_clutch.h> | |
45 | #include <machine/atomic.h> | |
46 | #include <kern/sched_clutch.h> | |
47 | #include <sys/kdebug.h> | |
48 | ||
c6bf4f31 A |
49 | #if __AMP__ |
50 | #include <kern/sched_amp_common.h> | |
51 | #endif /* __AMP__ */ | |
cb323159 A |
52 | |
53 | #if CONFIG_SCHED_CLUTCH | |
54 | ||
55 | /* Forward declarations of static routines */ | |
56 | ||
57 | /* Root level hierarchy management */ | |
58 | static void sched_clutch_root_init(sched_clutch_root_t, processor_set_t); | |
59 | static void sched_clutch_root_bucket_init(sched_clutch_root_bucket_t, sched_bucket_t); | |
60 | static void sched_clutch_root_pri_update(sched_clutch_root_t); | |
61 | static sched_clutch_root_bucket_t sched_clutch_root_highest_root_bucket(sched_clutch_root_t, uint64_t); | |
62 | static void sched_clutch_root_urgency_inc(sched_clutch_root_t, thread_t); | |
63 | static void sched_clutch_root_urgency_dec(sched_clutch_root_t, thread_t); | |
64 | ||
65 | /* Root bucket level hierarchy management */ | |
66 | static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bucket_t, uint64_t); | |
67 | static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t); | |
68 | static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t); | |
69 | ||
ea3f0419 A |
70 | /* Options for clutch bucket ordering in the runq */ |
71 | __options_decl(sched_clutch_bucket_options_t, uint32_t, { | |
72 | SCHED_CLUTCH_BUCKET_OPTIONS_NONE = 0x0, | |
73 | /* Round robin clutch bucket on thread removal */ | |
74 | SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR = 0x1, | |
75 | /* Insert clutch bucket at head (for thread preemption) */ | |
76 | SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ = 0x2, | |
77 | /* Insert clutch bucket at tail (default) */ | |
78 | SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ = 0x4, | |
79 | }); | |
80 | ||
cb323159 | 81 | /* Clutch bucket level hierarchy management */ |
ea3f0419 A |
82 | static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t); |
83 | static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t); | |
84 | static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t); | |
85 | static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t); | |
86 | static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t); | |
cb323159 A |
87 | |
88 | static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t); | |
89 | static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t); | |
90 | static uint8_t sched_clutch_bucket_pri_calculate(sched_clutch_bucket_t, uint64_t); | |
91 | static sched_clutch_bucket_t sched_clutch_root_bucket_highest_clutch_bucket(sched_clutch_root_bucket_t); | |
92 | ||
93 | /* Clutch timeshare properties updates */ | |
94 | static uint32_t sched_clutch_run_bucket_incr(sched_clutch_t, sched_bucket_t); | |
95 | static uint32_t sched_clutch_run_bucket_decr(sched_clutch_t, sched_bucket_t); | |
96 | static void sched_clutch_bucket_cpu_adjust(sched_clutch_bucket_t); | |
97 | static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t); | |
98 | static boolean_t sched_thread_sched_pri_promoted(thread_t); | |
99 | /* Clutch membership management */ | |
100 | static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t); | |
ea3f0419 | 101 | static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t, sched_clutch_bucket_options_t); |
cb323159 A |
102 | static thread_t sched_clutch_thread_highest(sched_clutch_root_t); |
103 | ||
104 | /* Clutch properties updates */ | |
105 | static uint32_t sched_clutch_root_urgency(sched_clutch_root_t); | |
106 | static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t); | |
107 | static int sched_clutch_root_priority(sched_clutch_root_t); | |
108 | ||
c6bf4f31 A |
109 | #if __AMP__ |
110 | /* System based routines */ | |
111 | static bool sched_clutch_pset_available(processor_set_t); | |
112 | #endif /* __AMP__ */ | |
cb323159 A |
113 | |
114 | /* Helper debugging routines */ | |
115 | static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t); | |
116 | ||
117 | ||
118 | ||
119 | /* | |
120 | * Global priority queue comparator routine for root buckets. The | |
121 | * routine implements the priority queue as a minimum deadline queue | |
122 | * to achieve EDF scheduling. | |
123 | */ | |
124 | priority_queue_compare_fn_t sched_clutch_root_bucket_compare; | |
125 | ||
126 | ||
127 | /* | |
128 | * Special markers for buckets that have invalid WCELs/quantums etc. | |
129 | */ | |
130 | #define SCHED_CLUTCH_INVALID_TIME_32 ((uint32_t)~0) | |
131 | #define SCHED_CLUTCH_INVALID_TIME_64 ((uint64_t)~0) | |
132 | ||
133 | /* | |
134 | * Root level bucket WCELs | |
135 | * | |
136 | * The root level bucket selection algorithm is an Earliest Deadline | |
137 | * First (EDF) algorithm where the deadline for buckets are defined | |
138 | * by the worst-case-execution-latency and the make runnable timestamp | |
139 | * for the bucket. | |
140 | * | |
141 | */ | |
142 | static uint32_t sched_clutch_root_bucket_wcel_us[TH_BUCKET_SCHED_MAX] = { | |
143 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
144 | 0, /* FG */ | |
145 | 37500, /* IN (37.5ms) */ | |
146 | 75000, /* DF (75ms) */ | |
147 | 150000, /* UT (150ms) */ | |
148 | 250000 /* BG (250ms) */ | |
149 | }; | |
150 | static uint64_t sched_clutch_root_bucket_wcel[TH_BUCKET_SCHED_MAX] = {0}; | |
151 | ||
152 | /* | |
153 | * Root level bucket warp | |
154 | * | |
155 | * Each root level bucket has a warp value associated with it as well. | |
156 | * The warp value allows the root bucket to effectively warp ahead of | |
157 | * lower priority buckets for a limited time even if it has a later | |
158 | * deadline. The warping behavior provides extra (but limited) | |
159 | * opportunity for high priority buckets to remain responsive. | |
160 | */ | |
161 | ||
162 | /* Special warp deadline value to indicate that the bucket has not used any warp yet */ | |
163 | #define SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED (SCHED_CLUTCH_INVALID_TIME_64) | |
164 | ||
165 | /* Warp window durations for various tiers */ | |
166 | static uint32_t sched_clutch_root_bucket_warp_us[TH_BUCKET_SCHED_MAX] = { | |
167 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
168 | 8000, /* FG (8ms)*/ | |
169 | 4000, /* IN (4ms) */ | |
170 | 2000, /* DF (2ms) */ | |
171 | 1000, /* UT (1ms) */ | |
172 | 0 /* BG (0ms) */ | |
173 | }; | |
174 | static uint64_t sched_clutch_root_bucket_warp[TH_BUCKET_SCHED_MAX] = {0}; | |
175 | ||
176 | /* | |
177 | * Thread level quantum | |
178 | * | |
179 | * The algorithm defines quantums for threads at various buckets. This | |
180 | * (combined with the root level bucket quantums) restricts how much | |
181 | * the lower priority levels can preempt the higher priority threads. | |
182 | */ | |
183 | static uint32_t sched_clutch_thread_quantum_us[TH_BUCKET_SCHED_MAX] = { | |
184 | 10000, /* FIXPRI (10ms) */ | |
185 | 10000, /* FG (10ms) */ | |
186 | 8000, /* IN (8ms) */ | |
187 | 6000, /* DF (6ms) */ | |
188 | 4000, /* UT (4ms) */ | |
189 | 2000 /* BG (2ms) */ | |
190 | }; | |
191 | static uint64_t sched_clutch_thread_quantum[TH_BUCKET_SCHED_MAX] = {0}; | |
192 | ||
193 | enum sched_clutch_state { | |
194 | SCHED_CLUTCH_STATE_EMPTY = 0, | |
195 | SCHED_CLUTCH_STATE_RUNNABLE, | |
196 | }; | |
197 | ||
198 | /* | |
199 | * sched_clutch_us_to_abstime() | |
200 | * | |
201 | * Initializer for converting all durations in usec to abstime | |
202 | */ | |
203 | static void | |
204 | sched_clutch_us_to_abstime(uint32_t *us_vals, uint64_t *abstime_vals) | |
205 | { | |
206 | for (int i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
207 | if (us_vals[i] == SCHED_CLUTCH_INVALID_TIME_32) { | |
208 | abstime_vals[i] = SCHED_CLUTCH_INVALID_TIME_64; | |
209 | } else { | |
210 | clock_interval_to_absolutetime_interval(us_vals[i], | |
211 | NSEC_PER_USEC, &abstime_vals[i]); | |
212 | } | |
213 | } | |
214 | } | |
215 | ||
216 | #if DEVELOPMENT || DEBUG | |
217 | ||
218 | /* | |
219 | * sched_clutch_hierarchy_locked_assert() | |
220 | * | |
221 | * Debugging helper routine. Asserts that the hierarchy is locked. The locking | |
222 | * for the hierarchy depends on where the hierarchy is hooked. The current | |
223 | * implementation hooks the hierarchy at the pset, so the hierarchy is locked | |
224 | * using the pset lock. | |
225 | */ | |
226 | static inline void | |
227 | sched_clutch_hierarchy_locked_assert( | |
228 | sched_clutch_root_t root_clutch) | |
229 | { | |
230 | pset_assert_locked(root_clutch->scr_pset); | |
231 | } | |
232 | ||
233 | #else /* DEVELOPMENT || DEBUG */ | |
234 | ||
235 | static inline void | |
236 | sched_clutch_hierarchy_locked_assert( | |
237 | __unused sched_clutch_root_t root_clutch) | |
238 | { | |
239 | } | |
240 | ||
241 | #endif /* DEVELOPMENT || DEBUG */ | |
242 | ||
243 | /* | |
244 | * sched_clutch_thr_count_inc() | |
245 | * | |
246 | * Increment thread count at a hierarchy level with overflow checks. | |
247 | */ | |
248 | static void | |
249 | sched_clutch_thr_count_inc( | |
250 | uint16_t *thr_count) | |
251 | { | |
252 | if (__improbable(os_inc_overflow(thr_count))) { | |
253 | panic("sched_clutch thread count overflowed!"); | |
254 | } | |
255 | } | |
256 | ||
257 | /* | |
258 | * sched_clutch_thr_count_dec() | |
259 | * | |
260 | * Decrement thread count at a hierarchy level with underflow checks. | |
261 | */ | |
262 | static void | |
263 | sched_clutch_thr_count_dec( | |
264 | uint16_t *thr_count) | |
265 | { | |
266 | if (__improbable(os_dec_overflow(thr_count))) { | |
267 | panic("sched_clutch thread count underflowed!"); | |
268 | } | |
269 | } | |
270 | ||
c6bf4f31 A |
271 | #if __AMP__ |
272 | ||
273 | /* | |
274 | * sched_clutch_pset_available() | |
275 | * | |
276 | * Routine to determine if a pset is available for scheduling. | |
277 | */ | |
278 | static bool | |
279 | sched_clutch_pset_available(processor_set_t pset) | |
280 | { | |
281 | /* Check if cluster has none of the CPUs available */ | |
282 | if (pset->online_processor_count == 0) { | |
283 | return false; | |
284 | } | |
285 | ||
286 | /* Check if the cluster is not recommended by CLPC */ | |
287 | if (!pset_is_recommended(pset)) { | |
288 | return false; | |
289 | } | |
290 | ||
291 | return true; | |
292 | } | |
293 | ||
294 | #endif /* __AMP__ */ | |
cb323159 A |
295 | |
296 | /* | |
297 | * sched_clutch_root_init() | |
298 | * | |
299 | * Routine to initialize the scheduler hierarchy root. | |
300 | */ | |
301 | static void | |
302 | sched_clutch_root_init( | |
303 | sched_clutch_root_t root_clutch, | |
304 | processor_set_t pset) | |
305 | { | |
306 | root_clutch->scr_thr_count = 0; | |
307 | root_clutch->scr_priority = NOPRI; | |
308 | root_clutch->scr_urgency = 0; | |
309 | root_clutch->scr_pset = pset; | |
310 | ||
311 | /* Initialize the queue which maintains all runnable clutch_buckets for timesharing purposes */ | |
312 | queue_init(&root_clutch->scr_clutch_buckets); | |
313 | ||
314 | /* Initialize the queue which maintains all runnable foreign clutch buckets */ | |
315 | queue_init(&root_clutch->scr_foreign_buckets); | |
316 | ||
317 | /* Initialize the bitmap and priority queue of runnable root buckets */ | |
318 | sched_clutch_root_bucket_compare = priority_heap_make_comparator(a, b, struct sched_clutch_root_bucket, scrb_pqlink, { | |
319 | return (a->scrb_deadline < b->scrb_deadline) ? 1 : ((a->scrb_deadline == b->scrb_deadline) ? 0 : -1); | |
320 | }); | |
321 | priority_queue_init(&root_clutch->scr_root_buckets, PRIORITY_QUEUE_GENERIC_KEY | PRIORITY_QUEUE_MIN_HEAP); | |
322 | bitmap_zero(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX); | |
323 | bitmap_zero(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
324 | ||
325 | /* Initialize all the root buckets */ | |
326 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
327 | sched_clutch_root_bucket_init(&root_clutch->scr_buckets[i], i); | |
328 | } | |
329 | } | |
330 | ||
ea3f0419 A |
331 | /* |
332 | * Clutch Bucket Runqueues | |
333 | * | |
334 | * The clutch buckets are maintained in a runq at the root bucket level. The | |
335 | * runq organization allows clutch buckets to be ordered based on various | |
336 | * factors such as: | |
337 | * | |
338 | * - Clutch buckets are round robin'ed at the same priority level when a | |
339 | * thread is selected from a clutch bucket. This prevents a clutch bucket | |
340 | * from starving out other clutch buckets at the same priority. | |
341 | * | |
342 | * - Clutch buckets are inserted at the head when it becomes runnable due to | |
343 | * thread preemption. This allows threads that were preempted to maintain | |
344 | * their order in the queue. | |
345 | * | |
346 | */ | |
347 | ||
348 | /* | |
349 | * sched_clutch_bucket_runq_init() | |
350 | * | |
351 | * Initialize a clutch bucket runq. | |
352 | */ | |
353 | static void | |
354 | sched_clutch_bucket_runq_init( | |
355 | sched_clutch_bucket_runq_t clutch_buckets_rq) | |
356 | { | |
357 | clutch_buckets_rq->scbrq_highq = NOPRI; | |
358 | for (uint8_t i = 0; i < BITMAP_LEN(NRQS); i++) { | |
359 | clutch_buckets_rq->scbrq_bitmap[i] = 0; | |
360 | } | |
361 | clutch_buckets_rq->scbrq_count = 0; | |
362 | for (int i = 0; i < NRQS; i++) { | |
363 | circle_queue_init(&clutch_buckets_rq->scbrq_queues[i]); | |
364 | } | |
365 | } | |
366 | ||
367 | /* | |
368 | * sched_clutch_bucket_runq_empty() | |
369 | * | |
370 | * Returns if a clutch bucket runq is empty. | |
371 | */ | |
372 | static boolean_t | |
373 | sched_clutch_bucket_runq_empty( | |
374 | sched_clutch_bucket_runq_t clutch_buckets_rq) | |
375 | { | |
376 | return clutch_buckets_rq->scbrq_count == 0; | |
377 | } | |
378 | ||
379 | /* | |
380 | * sched_clutch_bucket_runq_peek() | |
381 | * | |
382 | * Returns the highest priority clutch bucket in the runq. | |
383 | */ | |
384 | static sched_clutch_bucket_t | |
385 | sched_clutch_bucket_runq_peek( | |
386 | sched_clutch_bucket_runq_t clutch_buckets_rq) | |
387 | { | |
388 | if (clutch_buckets_rq->scbrq_count > 0) { | |
389 | circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_buckets_rq->scbrq_highq]; | |
390 | return cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink); | |
391 | } else { | |
392 | return NULL; | |
393 | } | |
394 | } | |
395 | ||
396 | /* | |
397 | * sched_clutch_bucket_runq_enqueue() | |
398 | * | |
399 | * Enqueue a clutch bucket into the runq based on the options passed in. | |
400 | */ | |
401 | static void | |
402 | sched_clutch_bucket_runq_enqueue( | |
403 | sched_clutch_bucket_runq_t clutch_buckets_rq, | |
404 | sched_clutch_bucket_t clutch_bucket, | |
405 | sched_clutch_bucket_options_t options) | |
406 | { | |
407 | circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority]; | |
408 | if (circle_queue_empty(queue)) { | |
409 | circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink); | |
410 | bitmap_set(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority); | |
411 | if (clutch_bucket->scb_priority > clutch_buckets_rq->scbrq_highq) { | |
412 | clutch_buckets_rq->scbrq_highq = clutch_bucket->scb_priority; | |
413 | } | |
414 | } else { | |
415 | if (options & SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ) { | |
416 | circle_enqueue_head(queue, &clutch_bucket->scb_runqlink); | |
417 | } else { | |
418 | /* | |
419 | * Default behavior (handles SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ & | |
420 | * SCHED_CLUTCH_BUCKET_OPTIONS_NONE) | |
421 | */ | |
422 | circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink); | |
423 | } | |
424 | } | |
425 | clutch_buckets_rq->scbrq_count++; | |
426 | } | |
427 | ||
428 | /* | |
429 | * sched_clutch_bucket_runq_remove() | |
430 | * | |
431 | * Remove a clutch bucket from the runq. | |
432 | */ | |
433 | static void | |
434 | sched_clutch_bucket_runq_remove( | |
435 | sched_clutch_bucket_runq_t clutch_buckets_rq, | |
436 | sched_clutch_bucket_t clutch_bucket) | |
437 | { | |
438 | circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority]; | |
439 | circle_dequeue(queue, &clutch_bucket->scb_runqlink); | |
440 | assert(clutch_buckets_rq->scbrq_count > 0); | |
441 | clutch_buckets_rq->scbrq_count--; | |
442 | if (circle_queue_empty(queue)) { | |
443 | bitmap_clear(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority); | |
444 | clutch_buckets_rq->scbrq_highq = bitmap_first(clutch_buckets_rq->scbrq_bitmap, NRQS); | |
445 | } | |
446 | } | |
447 | ||
448 | static void | |
449 | sched_clutch_bucket_runq_rotate( | |
450 | sched_clutch_bucket_runq_t clutch_buckets_rq, | |
451 | sched_clutch_bucket_t clutch_bucket) | |
452 | { | |
453 | circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority]; | |
454 | assert(clutch_bucket == cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink)); | |
455 | circle_queue_rotate_head_forward(queue); | |
456 | } | |
457 | ||
cb323159 A |
458 | /* |
459 | * sched_clutch_root_bucket_init() | |
460 | * | |
461 | * Routine to initialize root buckets. | |
462 | */ | |
463 | static void | |
464 | sched_clutch_root_bucket_init( | |
465 | sched_clutch_root_bucket_t root_bucket, | |
466 | sched_bucket_t bucket) | |
467 | { | |
468 | root_bucket->scrb_bucket = bucket; | |
ea3f0419 | 469 | sched_clutch_bucket_runq_init(&root_bucket->scrb_clutch_buckets); |
cb323159 A |
470 | priority_queue_entry_init(&root_bucket->scrb_pqlink); |
471 | root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64; | |
472 | root_bucket->scrb_warped_deadline = 0; | |
473 | root_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[root_bucket->scrb_bucket]; | |
474 | } | |
475 | ||
476 | /* | |
477 | * sched_clutch_root_bucket_pri_compare() | |
478 | * | |
479 | * Routine to compare root buckets based on the highest runnable clutch | |
480 | * bucket priorities in the root buckets. | |
481 | */ | |
482 | static int | |
483 | sched_clutch_root_bucket_pri_compare( | |
484 | sched_clutch_root_bucket_t a, | |
485 | sched_clutch_root_bucket_t b) | |
486 | { | |
487 | sched_clutch_bucket_t a_highest = sched_clutch_root_bucket_highest_clutch_bucket(a); | |
488 | sched_clutch_bucket_t b_highest = sched_clutch_root_bucket_highest_clutch_bucket(b); | |
489 | return (a_highest->scb_priority > b_highest->scb_priority) ? | |
490 | 1 : ((a_highest->scb_priority == b_highest->scb_priority) ? 0 : -1); | |
491 | } | |
492 | ||
493 | /* | |
494 | * sched_clutch_root_select_aboveui() | |
495 | * | |
496 | * Special case scheduling for Above UI bucket. | |
497 | * | |
498 | * AboveUI threads are typically system critical threads that need low latency | |
499 | * which is why they are handled specially. | |
500 | * | |
501 | * Since the priority range for AboveUI and FG Timeshare buckets overlap, it is | |
502 | * important to maintain some native priority order between those buckets. The policy | |
503 | * implemented here is to compare the highest clutch buckets of both buckets; if the | |
504 | * Above UI bucket is higher, schedule it immediately. Otherwise fall through to the | |
505 | * deadline based scheduling which should pickup the timeshare buckets. | |
506 | * | |
507 | * The implementation allows extremely low latency CPU access for Above UI threads | |
508 | * while supporting the use case of high priority timeshare threads contending with | |
509 | * lower priority fixed priority threads. | |
510 | */ | |
511 | static boolean_t | |
512 | sched_clutch_root_select_aboveui( | |
513 | sched_clutch_root_t root_clutch) | |
514 | { | |
515 | if (bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_FIXPRI)) { | |
516 | sched_clutch_root_bucket_t root_bucket_aboveui = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
517 | sched_clutch_root_bucket_t root_bucket_sharefg = &root_clutch->scr_buckets[TH_BUCKET_SHARE_FG]; | |
518 | ||
519 | if (!bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_SHARE_FG)) { | |
520 | /* If the timeshare FG bucket is not runnable, pick the aboveUI bucket for scheduling */ | |
521 | return true; | |
522 | } | |
523 | if (sched_clutch_root_bucket_pri_compare(root_bucket_aboveui, root_bucket_sharefg) >= 0) { | |
524 | /* If the aboveUI bucket has a higher native clutch bucket priority, schedule it */ | |
525 | return true; | |
526 | } | |
527 | } | |
528 | return false; | |
529 | } | |
530 | ||
531 | ||
532 | /* | |
533 | * sched_clutch_root_highest_root_bucket() | |
534 | * | |
535 | * Main routine to find the highest runnable root level bucket. | |
536 | * This routine is called from performance sensitive contexts; so it is | |
537 | * crucial to keep this O(1). | |
538 | * | |
539 | */ | |
540 | static sched_clutch_root_bucket_t | |
541 | sched_clutch_root_highest_root_bucket( | |
542 | sched_clutch_root_t root_clutch, | |
543 | uint64_t timestamp) | |
544 | { | |
545 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
546 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
547 | return NULL; | |
548 | } | |
549 | ||
550 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
551 | return &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
552 | } | |
553 | ||
554 | /* | |
555 | * Above UI bucket is not runnable or has a low priority clutch bucket; use the earliest deadline model | |
556 | * to schedule threads. The idea is that as the timeshare buckets use CPU, they will drop their | |
557 | * interactivity score and allow low priority AboveUI clutch buckets to be scheduled. | |
558 | */ | |
559 | ||
560 | /* Find the earliest deadline bucket */ | |
561 | sched_clutch_root_bucket_t edf_bucket = priority_queue_min(&root_clutch->scr_root_buckets, struct sched_clutch_root_bucket, scrb_pqlink); | |
562 | ||
563 | sched_clutch_root_bucket_t warp_bucket = NULL; | |
564 | int warp_bucket_index = -1; | |
565 | evaluate_warp_buckets: | |
566 | /* Check if any higher runnable buckets have warp available */ | |
567 | warp_bucket_index = bitmap_lsb_first(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
568 | ||
569 | if ((warp_bucket_index == -1) || (warp_bucket_index >= edf_bucket->scrb_bucket)) { | |
570 | /* No higher buckets have warp available; choose the edf bucket and replenish its warp */ | |
571 | sched_clutch_root_bucket_deadline_update(edf_bucket, root_clutch, timestamp); | |
572 | edf_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[edf_bucket->scrb_bucket]; | |
573 | edf_bucket->scrb_warped_deadline = SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED; | |
574 | bitmap_set(root_clutch->scr_warp_available, edf_bucket->scrb_bucket); | |
575 | return edf_bucket; | |
576 | } | |
577 | ||
578 | /* | |
579 | * Looks like there is a root bucket which is higher in the natural priority | |
580 | * order than edf_bucket and might have some warp remaining. | |
581 | */ | |
582 | warp_bucket = &root_clutch->scr_buckets[warp_bucket_index]; | |
583 | if (warp_bucket->scrb_warped_deadline == SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
584 | /* Root bucket has not used any of its warp; set a deadline to expire its warp and return it */ | |
585 | warp_bucket->scrb_warped_deadline = timestamp + warp_bucket->scrb_warp_remaining; | |
586 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
587 | return warp_bucket; | |
588 | } | |
589 | if (warp_bucket->scrb_warped_deadline > timestamp) { | |
590 | /* Root bucket already has a warp window open with some warp remaining */ | |
591 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
592 | return warp_bucket; | |
593 | } | |
594 | ||
595 | /* For this bucket, warp window was opened sometime in the past but has now | |
596 | * expired. Mark the bucket as not avilable for warp anymore and re-run the | |
597 | * warp bucket selection logic. | |
598 | */ | |
599 | warp_bucket->scrb_warp_remaining = 0; | |
600 | bitmap_clear(root_clutch->scr_warp_available, warp_bucket->scrb_bucket); | |
601 | goto evaluate_warp_buckets; | |
602 | } | |
603 | ||
604 | /* | |
605 | * sched_clutch_root_bucket_deadline_calculate() | |
606 | * | |
607 | * Calculate the deadline for the bucket based on its WCEL | |
608 | */ | |
609 | static uint64_t | |
610 | sched_clutch_root_bucket_deadline_calculate( | |
611 | sched_clutch_root_bucket_t root_bucket, | |
612 | uint64_t timestamp) | |
613 | { | |
614 | /* For fixpri AboveUI bucket always return it as the earliest deadline */ | |
615 | if (root_bucket->scrb_bucket < TH_BUCKET_SHARE_FG) { | |
616 | return 0; | |
617 | } | |
618 | ||
619 | /* For all timeshare buckets set the deadline as current time + worst-case-execution-latency */ | |
620 | return timestamp + sched_clutch_root_bucket_wcel[root_bucket->scrb_bucket]; | |
621 | } | |
622 | ||
623 | /* | |
624 | * sched_clutch_root_bucket_deadline_update() | |
625 | * | |
626 | * Routine to update the deadline of the root bucket when it is selected. | |
627 | * Updating the deadline also moves the root_bucket in the EDF priority | |
628 | * queue. | |
629 | */ | |
630 | static void | |
631 | sched_clutch_root_bucket_deadline_update( | |
632 | sched_clutch_root_bucket_t root_bucket, | |
633 | sched_clutch_root_t root_clutch, | |
634 | uint64_t timestamp) | |
635 | { | |
636 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
637 | /* The algorithm never uses the deadlines for scheduling TH_BUCKET_FIXPRI bucket */ | |
638 | return; | |
639 | } | |
640 | uint64_t old_deadline = root_bucket->scrb_deadline; | |
641 | uint64_t new_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
642 | assert(old_deadline <= new_deadline); | |
643 | if (old_deadline != new_deadline) { | |
644 | root_bucket->scrb_deadline = new_deadline; | |
645 | /* Since the priority queue is a min-heap, use the decrease routine even though the deadline has a larger value now */ | |
646 | priority_queue_entry_decrease(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
647 | } | |
648 | } | |
649 | ||
650 | /* | |
651 | * sched_clutch_root_bucket_runnable() | |
652 | * | |
653 | * Routine to insert a newly runnable root bucket into the hierarchy. | |
654 | * Also updates the deadline and warp parameters as necessary. | |
655 | */ | |
656 | static void | |
657 | sched_clutch_root_bucket_runnable( | |
658 | sched_clutch_root_bucket_t root_bucket, | |
659 | sched_clutch_root_t root_clutch, | |
660 | uint64_t timestamp) | |
661 | { | |
662 | /* Mark the root bucket as runnable */ | |
663 | bitmap_set(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
664 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
665 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, 0, 0, 0); | |
666 | ||
667 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
668 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
669 | return; | |
670 | } | |
671 | ||
672 | root_bucket->scrb_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
673 | priority_queue_insert(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
674 | ||
675 | if (root_bucket->scrb_warp_remaining) { | |
676 | /* Since the bucket has some warp remaining and its now runnable, mark it as available for warp */ | |
677 | bitmap_set(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
678 | } | |
679 | } | |
680 | ||
681 | /* | |
682 | * sched_clutch_root_bucket_empty() | |
683 | * | |
684 | * Routine to remove an empty root bucket from the hierarchy. | |
685 | * Also updates the deadline and warp parameters as necessary. | |
686 | */ | |
687 | static void | |
688 | sched_clutch_root_bucket_empty( | |
689 | sched_clutch_root_bucket_t root_bucket, | |
690 | sched_clutch_root_t root_clutch, | |
691 | uint64_t timestamp) | |
692 | { | |
693 | bitmap_clear(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
694 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
695 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0, 0); | |
696 | ||
697 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
698 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
699 | return; | |
700 | } | |
701 | ||
702 | priority_queue_remove(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, sched_clutch_root_bucket_compare); | |
703 | ||
704 | bitmap_clear(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
705 | if (root_bucket->scrb_warped_deadline > timestamp) { | |
706 | /* | |
707 | * For root buckets that were using the warp, check if the warp | |
708 | * deadline is in the future. If yes, remove the wall time the | |
709 | * warp was active and update the warp remaining. This allows | |
710 | * the root bucket to use the remaining warp the next time it | |
711 | * becomes runnable. | |
712 | */ | |
713 | root_bucket->scrb_warp_remaining = root_bucket->scrb_warped_deadline - timestamp; | |
714 | } else if (root_bucket->scrb_warped_deadline != SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
715 | /* | |
716 | * If the root bucket's warped deadline is in the past, it has used up | |
717 | * all the warp it was assigned. Empty out its warp remaining. | |
718 | */ | |
719 | root_bucket->scrb_warp_remaining = 0; | |
720 | } | |
721 | } | |
722 | ||
723 | /* | |
724 | * sched_clutch_root_pri_update() | |
725 | * | |
726 | * The root level priority is used for thread selection and preemption | |
727 | * logic. | |
728 | */ | |
729 | static void | |
730 | sched_clutch_root_pri_update( | |
731 | sched_clutch_root_t root_clutch) | |
732 | { | |
733 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
734 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
735 | /* No runnable root buckets */ | |
736 | root_clutch->scr_priority = NOPRI; | |
737 | assert(root_clutch->scr_urgency == 0); | |
738 | return; | |
739 | } | |
740 | sched_clutch_root_bucket_t root_bucket = NULL; | |
741 | /* Special case for AboveUI (uses same logic as thread selection) */ | |
742 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
743 | root_bucket = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
744 | } else { | |
745 | /* | |
746 | * AboveUI bucket is not runnable or has a low clutch bucket priority, | |
747 | * select the next runnable root bucket in natural priority order. This logic | |
748 | * is slightly different from thread selection, because thread selection | |
749 | * considers deadlines, warps etc. to decide the most optimal bucket at a | |
750 | * given timestamp. Since the priority value is used for preemption decisions | |
751 | * only, it needs to be based on the highest runnable thread available in | |
752 | * the timeshare domain. | |
753 | */ | |
754 | int root_bucket_index = bitmap_lsb_next(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX, TH_BUCKET_FIXPRI); | |
755 | assert(root_bucket_index != -1); | |
756 | root_bucket = &root_clutch->scr_buckets[root_bucket_index]; | |
757 | } | |
758 | /* For the selected root bucket, find the highest priority clutch bucket */ | |
759 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
760 | root_clutch->scr_priority = priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
761 | } | |
762 | ||
763 | /* | |
764 | * sched_clutch_root_urgency_inc() | |
765 | * | |
766 | * Routine to increment the urgency at the root level based on the thread | |
767 | * priority that is being inserted into the hierarchy. The root urgency | |
768 | * counter is updated based on the urgency of threads in any of the | |
769 | * clutch buckets which are part of the hierarchy. | |
770 | * | |
771 | * Always called with the pset lock held. | |
772 | */ | |
773 | static void | |
774 | sched_clutch_root_urgency_inc( | |
775 | sched_clutch_root_t root_clutch, | |
776 | thread_t thread) | |
777 | { | |
778 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
779 | root_clutch->scr_urgency++; | |
780 | } | |
781 | } | |
782 | ||
783 | /* | |
784 | * sched_clutch_root_urgency_dec() | |
785 | * | |
786 | * Routine to decrement the urgency at the root level based on the thread | |
787 | * priority that is being removed from the hierarchy. The root urgency | |
788 | * counter is updated based on the urgency of threads in any of the | |
789 | * clutch buckets which are part of the hierarchy. | |
790 | * | |
791 | * Always called with the pset lock held. | |
792 | */ | |
793 | static void | |
794 | sched_clutch_root_urgency_dec( | |
795 | sched_clutch_root_t root_clutch, | |
796 | thread_t thread) | |
797 | { | |
798 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
799 | root_clutch->scr_urgency--; | |
800 | } | |
801 | } | |
802 | ||
803 | /* | |
804 | * Clutch bucket level scheduling | |
805 | * | |
806 | * The second level of scheduling is the clutch bucket level scheduling | |
807 | * which tries to schedule thread groups within root_buckets. Each | |
808 | * clutch represents a thread group and a clutch_bucket represents | |
809 | * threads at a particular sched_bucket within that thread group. The | |
810 | * goal of this level of scheduling is to allow interactive thread | |
811 | * groups low latency access to the CPU. It also provides slight | |
812 | * scheduling preference for App and unrestricted thread groups. | |
813 | * | |
814 | * The clutch bucket scheduling algorithm measures an interactivity | |
815 | * score for all clutch buckets. The interactivity score is based | |
816 | * on the ratio of the CPU used and the voluntary blocking of threads | |
817 | * within the clutch bucket. The algorithm is very close to the ULE | |
818 | * scheduler on FreeBSD in terms of calculations. The interactivity | |
819 | * score provides an interactivity boost in the range of | |
820 | * [0:SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI * 2] which allows interactive | |
821 | * thread groups to win over CPU spinners. | |
822 | */ | |
823 | ||
824 | /* Priority boost range for interactivity */ | |
825 | #define SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT (8) | |
826 | uint8_t sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
827 | ||
828 | /* window to scale the cpu usage and blocked values (currently 500ms). Its the threshold of used+blocked */ | |
829 | uint64_t sched_clutch_bucket_adjust_threshold = 0; | |
830 | #define SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS (500000) | |
831 | ||
832 | /* The ratio to scale the cpu/blocked time per window */ | |
833 | #define SCHED_CLUTCH_BUCKET_ADJUST_RATIO (10) | |
834 | ||
835 | /* rate at which interactivity score is recalculated. This keeps the score smooth in terms of extremely bursty behavior */ | |
836 | uint64_t sched_clutch_bucket_interactivity_delta = 0; | |
837 | #define SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT (25000) | |
838 | ||
839 | /* | |
840 | * In order to allow App thread groups some preference over daemon thread | |
841 | * groups, the App clutch_buckets get a 8 point boost. The boost value should | |
842 | * be chosen such that badly behaved apps are still penalized over well | |
843 | * behaved interactive daemon clutch_buckets. | |
844 | */ | |
845 | #define SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT (8) | |
846 | uint8_t sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
847 | ||
848 | /* Initial value for voluntary blocking time for the clutch_bucket */ | |
849 | #define SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID (uint32_t)(~0) | |
850 | ||
851 | /* | |
852 | * sched_clutch_bucket_init() | |
853 | * | |
854 | * Initializer for clutch buckets. | |
855 | */ | |
856 | static void | |
857 | sched_clutch_bucket_init( | |
858 | sched_clutch_bucket_t clutch_bucket, | |
859 | sched_clutch_t clutch, | |
860 | sched_bucket_t bucket) | |
861 | { | |
862 | bzero(clutch_bucket, sizeof(struct sched_clutch_bucket)); | |
863 | ||
864 | clutch_bucket->scb_bucket = bucket; | |
865 | /* scb_priority will be recalculated when a thread is inserted in the clutch bucket */ | |
866 | clutch_bucket->scb_priority = 0; | |
867 | /* | |
868 | * All thread groups should be initialized to be interactive; this allows the newly launched | |
869 | * thread groups to fairly compete with already running thread groups. | |
870 | */ | |
871 | clutch_bucket->scb_interactivity_score = (sched_clutch_bucket_interactive_pri * 2); | |
872 | clutch_bucket->scb_foreign = false; | |
873 | ||
874 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, 0, relaxed); | |
875 | os_atomic_store(&clutch_bucket->scb_pri_shift, INT8_MAX, relaxed); | |
876 | ||
877 | clutch_bucket->scb_interactivity_ts = 0; | |
878 | clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID; | |
cb323159 A |
879 | clutch_bucket->scb_clutch = clutch; |
880 | clutch_bucket->scb_root = NULL; | |
881 | priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP); | |
882 | run_queue_init(&clutch_bucket->scb_runq); | |
883 | } | |
884 | ||
885 | /* | |
886 | * sched_clutch_init_with_thread_group() | |
887 | * | |
888 | * Initialize the sched_clutch when the thread group is being created | |
889 | */ | |
890 | void | |
891 | sched_clutch_init_with_thread_group( | |
892 | sched_clutch_t clutch, | |
893 | struct thread_group *tg) | |
894 | { | |
895 | os_atomic_store(&clutch->sc_thr_count, 0, relaxed); | |
896 | ||
897 | /* Initialize all the clutch buckets */ | |
898 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
899 | sched_clutch_bucket_init(&(clutch->sc_clutch_buckets[i]), clutch, i); | |
900 | } | |
901 | ||
902 | /* Grouping specific fields */ | |
903 | clutch->sc_tg = tg; | |
904 | os_atomic_store(&clutch->sc_tg_priority, 0, relaxed); | |
905 | } | |
906 | ||
907 | /* | |
908 | * sched_clutch_destroy() | |
909 | * | |
910 | * Destructor for clutch; called from thread group release code. | |
911 | */ | |
912 | void | |
913 | sched_clutch_destroy( | |
914 | __unused sched_clutch_t clutch) | |
915 | { | |
916 | assert(os_atomic_load(&clutch->sc_thr_count, relaxed) == 0); | |
917 | } | |
918 | ||
c6bf4f31 A |
919 | #if __AMP__ |
920 | ||
921 | /* | |
922 | * sched_clutch_bucket_foreign() | |
923 | * | |
924 | * Identifies if the clutch bucket is a foreign (not recommended for) this | |
925 | * hierarchy. This is possible due to the recommended hierarchy/pset not | |
926 | * available for scheduling currently. | |
927 | */ | |
928 | static boolean_t | |
929 | sched_clutch_bucket_foreign(sched_clutch_root_t root_clutch, sched_clutch_bucket_t clutch_bucket) | |
930 | { | |
931 | assert(clutch_bucket->scb_thr_count > 0); | |
932 | if (!sched_clutch_pset_available(root_clutch->scr_pset)) { | |
933 | /* Even though the pset was not available for scheduling, threads | |
934 | * are being put in its runq (this might be due to the other pset | |
935 | * being turned off and this being the master processor pset). | |
936 | * Mark the clutch bucket as foreign so that when the other | |
937 | * pset becomes available, it moves the clutch bucket accordingly. | |
938 | */ | |
939 | return true; | |
940 | } | |
941 | thread_t thread = run_queue_peek(&clutch_bucket->scb_runq); | |
942 | pset_cluster_type_t pset_type = recommended_pset_type(thread); | |
943 | return pset_type != root_clutch->scr_pset->pset_cluster_type; | |
944 | } | |
945 | ||
946 | #endif /* __AMP__ */ | |
cb323159 A |
947 | |
948 | /* | |
949 | * sched_clutch_bucket_hierarchy_insert() | |
950 | * | |
951 | * Routine to insert a newly runnable clutch_bucket into the root hierarchy. | |
952 | */ | |
953 | static void | |
954 | sched_clutch_bucket_hierarchy_insert( | |
955 | sched_clutch_root_t root_clutch, | |
956 | sched_clutch_bucket_t clutch_bucket, | |
957 | sched_bucket_t bucket, | |
ea3f0419 A |
958 | uint64_t timestamp, |
959 | sched_clutch_bucket_options_t options) | |
cb323159 A |
960 | { |
961 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
962 | if (bucket > TH_BUCKET_FIXPRI) { | |
963 | /* Enqueue the timeshare clutch buckets into the global runnable clutch_bucket list; used for sched tick operations */ | |
964 | enqueue_tail(&root_clutch->scr_clutch_buckets, &clutch_bucket->scb_listlink); | |
965 | } | |
c6bf4f31 A |
966 | #if __AMP__ |
967 | /* Check if the bucket is a foreign clutch bucket and add it to the foreign buckets list */ | |
968 | if (sched_clutch_bucket_foreign(root_clutch, clutch_bucket)) { | |
969 | clutch_bucket->scb_foreign = true; | |
970 | enqueue_tail(&root_clutch->scr_foreign_buckets, &clutch_bucket->scb_foreignlink); | |
971 | } | |
972 | #endif /* __AMP__ */ | |
cb323159 A |
973 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; |
974 | ||
975 | /* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */ | |
ea3f0419 | 976 | if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) { |
cb323159 A |
977 | sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp); |
978 | } | |
979 | ||
ea3f0419 A |
980 | /* Insert the clutch bucket into the root bucket run queue with order based on options */ |
981 | sched_clutch_bucket_runq_enqueue(&root_bucket->scrb_clutch_buckets, clutch_bucket, options); | |
cb323159 A |
982 | os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed); |
983 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
984 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0); | |
985 | } | |
986 | ||
987 | /* | |
988 | * sched_clutch_bucket_hierarchy_remove() | |
989 | * | |
990 | * Rotuine to remove a empty clutch bucket from the root hierarchy. | |
991 | */ | |
992 | static void | |
993 | sched_clutch_bucket_hierarchy_remove( | |
994 | sched_clutch_root_t root_clutch, | |
995 | sched_clutch_bucket_t clutch_bucket, | |
996 | sched_bucket_t bucket, | |
ea3f0419 A |
997 | uint64_t timestamp, |
998 | __unused sched_clutch_bucket_options_t options) | |
cb323159 A |
999 | { |
1000 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1001 | if (bucket > TH_BUCKET_FIXPRI) { | |
1002 | /* Remove the timeshare clutch bucket from the globally runnable clutch_bucket list */ | |
1003 | remqueue(&clutch_bucket->scb_listlink); | |
1004 | } | |
c6bf4f31 A |
1005 | #if __AMP__ |
1006 | if (clutch_bucket->scb_foreign) { | |
1007 | clutch_bucket->scb_foreign = false; | |
1008 | remqueue(&clutch_bucket->scb_foreignlink); | |
1009 | } | |
1010 | #endif /* __AMP__ */ | |
cb323159 A |
1011 | |
1012 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; | |
1013 | ||
1014 | /* Remove the clutch bucket from the root bucket priority queue */ | |
ea3f0419 | 1015 | sched_clutch_bucket_runq_remove(&root_bucket->scrb_clutch_buckets, clutch_bucket); |
cb323159 A |
1016 | os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed); |
1017 | clutch_bucket->scb_blocked_ts = timestamp; | |
1018 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
1019 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0); | |
1020 | ||
1021 | /* If the root bucket priority queue is now empty, remove it from the root priority queue */ | |
ea3f0419 | 1022 | if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) { |
cb323159 A |
1023 | sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp); |
1024 | } | |
1025 | } | |
1026 | ||
1027 | /* | |
1028 | * sched_clutch_bucket_base_pri() | |
1029 | * | |
1030 | * Calculates the "base" priority of the clutch bucket. The base | |
1031 | * priority of the clutch bucket is the sum of the max of highest | |
1032 | * base_pri and highest sched_pri in the clutch bucket and any | |
1033 | * grouping specific (App/Daemon...) boosts applicable to the | |
1034 | * clutch_bucket. | |
1035 | */ | |
1036 | static uint8_t | |
1037 | sched_clutch_bucket_base_pri( | |
1038 | sched_clutch_bucket_t clutch_bucket) | |
1039 | { | |
1040 | uint8_t clutch_boost = 0; | |
1041 | assert(clutch_bucket->scb_runq.count != 0); | |
1042 | ||
1043 | sched_clutch_t clutch = clutch_bucket->scb_clutch; | |
1044 | ||
1045 | /* | |
1046 | * Since the clutch bucket can contain threads that are members of the group due | |
1047 | * to the sched_pri being promoted or due to their base pri, the base priority of | |
1048 | * the entire clutch bucket should be based on the highest thread (promoted or base) | |
1049 | * in the clutch bucket. | |
1050 | */ | |
1051 | uint8_t max_pri = priority_queue_empty(&clutch_bucket->scb_clutchpri_prioq) ? 0 : priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
1052 | ||
1053 | /* | |
1054 | * For all AboveUI clutch buckets and clutch buckets for thread groups that | |
1055 | * havent been specified as SCHED_CLUTCH_TG_PRI_LOW, give a priority boost | |
1056 | */ | |
1057 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
1058 | (os_atomic_load(&clutch->sc_tg_priority, relaxed) != SCHED_CLUTCH_TG_PRI_LOW)) { | |
1059 | clutch_boost = sched_clutch_bucket_pri_boost; | |
1060 | } | |
1061 | return max_pri + clutch_boost; | |
1062 | } | |
1063 | ||
1064 | /* | |
1065 | * sched_clutch_bucket_interactivity_score_calculate() | |
1066 | * | |
1067 | * Routine to calculate the interactivity score for the clutch bucket. The | |
1068 | * interactivity score is based on the ratio of CPU used by all threads in | |
1069 | * the bucket and the blocked time of the bucket as a whole. | |
1070 | */ | |
1071 | static uint8_t | |
1072 | sched_clutch_bucket_interactivity_score_calculate( | |
1073 | sched_clutch_bucket_t clutch_bucket, | |
1074 | uint64_t timestamp) | |
1075 | { | |
1076 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
1077 | /* | |
1078 | * Since the root bucket selection algorithm for Above UI looks at clutch bucket | |
1079 | * priorities, make sure all AboveUI buckets are marked interactive. | |
1080 | */ | |
1081 | assert(clutch_bucket->scb_interactivity_score == (2 * sched_clutch_bucket_interactive_pri)); | |
1082 | return clutch_bucket->scb_interactivity_score; | |
1083 | } | |
1084 | ||
1085 | if (clutch_bucket->scb_interactivity_ts == 0) { | |
1086 | /* | |
1087 | * This indicates a newly initialized clutch bucket; return the default interactivity score | |
1088 | * and update timestamp. | |
1089 | */ | |
1090 | clutch_bucket->scb_interactivity_ts = timestamp; | |
1091 | return clutch_bucket->scb_interactivity_score; | |
1092 | } | |
1093 | ||
1094 | if (timestamp < (clutch_bucket->scb_interactivity_ts + sched_clutch_bucket_interactivity_delta)) { | |
1095 | return clutch_bucket->scb_interactivity_score; | |
1096 | } | |
1097 | ||
1098 | /* Check if the clutch bucket accounting needs to be scaled */ | |
1099 | sched_clutch_bucket_cpu_adjust(clutch_bucket); | |
1100 | clutch_bucket->scb_interactivity_ts = timestamp; | |
1101 | ||
1102 | sched_clutch_bucket_cpu_data_t scb_cpu_data; | |
1103 | scb_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
1104 | clutch_cpu_data_t cpu_used = scb_cpu_data.cpu_data.scbcd_cpu_used; | |
1105 | clutch_cpu_data_t cpu_blocked = scb_cpu_data.cpu_data.scbcd_cpu_blocked; | |
1106 | ||
1107 | /* | |
1108 | * In extremely CPU contended cases, it is possible that the clutch bucket has been runnable | |
1109 | * for a long time but none of its threads have been picked up for execution. In that case, both | |
1110 | * the CPU used and blocked would be 0. | |
1111 | */ | |
1112 | if ((cpu_blocked == 0) && (cpu_used == 0)) { | |
1113 | return clutch_bucket->scb_interactivity_score; | |
1114 | } | |
1115 | ||
1116 | /* | |
1117 | * For all timeshare buckets, calculate the interactivity score of the bucket | |
1118 | * and add it to the base priority | |
1119 | */ | |
1120 | uint8_t interactive_score = 0; | |
1121 | if (cpu_blocked > cpu_used) { | |
1122 | /* Interactive clutch_bucket case */ | |
1123 | interactive_score = sched_clutch_bucket_interactive_pri + | |
1124 | ((sched_clutch_bucket_interactive_pri * (cpu_blocked - cpu_used)) / cpu_blocked); | |
1125 | } else { | |
1126 | /* Non-interactive clutch_bucket case */ | |
1127 | interactive_score = ((sched_clutch_bucket_interactive_pri * cpu_blocked) / cpu_used); | |
1128 | } | |
1129 | clutch_bucket->scb_interactivity_score = interactive_score; | |
1130 | return interactive_score; | |
1131 | } | |
1132 | ||
1133 | /* | |
1134 | * sched_clutch_bucket_pri_calculate() | |
1135 | * | |
1136 | * The priority calculation algorithm for the clutch_bucket is a slight | |
1137 | * modification on the ULE interactivity score. It uses the base priority | |
1138 | * of the clutch bucket and applies an interactivity score boost to the | |
1139 | * highly responsive clutch buckets. | |
1140 | */ | |
1141 | ||
1142 | static uint8_t | |
1143 | sched_clutch_bucket_pri_calculate( | |
1144 | sched_clutch_bucket_t clutch_bucket, | |
1145 | uint64_t timestamp) | |
1146 | { | |
1147 | /* For empty clutch buckets, return priority 0 */ | |
1148 | if (clutch_bucket->scb_thr_count == 0) { | |
1149 | return 0; | |
1150 | } | |
1151 | ||
1152 | uint8_t base_pri = sched_clutch_bucket_base_pri(clutch_bucket); | |
1153 | uint8_t interactive_score = sched_clutch_bucket_interactivity_score_calculate(clutch_bucket, timestamp); | |
1154 | ||
1155 | assert(((uint64_t)base_pri + interactive_score) <= UINT8_MAX); | |
1156 | uint8_t pri = base_pri + interactive_score; | |
1157 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_PRI) | DBG_FUNC_NONE, | |
1158 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, pri, interactive_score, 0); | |
1159 | return pri; | |
1160 | } | |
1161 | ||
1162 | /* | |
1163 | * sched_clutch_root_bucket_highest_clutch_bucket() | |
1164 | * | |
1165 | * Routine to find the highest priority clutch bucket | |
1166 | * within the root bucket. | |
1167 | */ | |
1168 | static sched_clutch_bucket_t | |
1169 | sched_clutch_root_bucket_highest_clutch_bucket( | |
1170 | sched_clutch_root_bucket_t root_bucket) | |
1171 | { | |
ea3f0419 | 1172 | if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) { |
cb323159 A |
1173 | return NULL; |
1174 | } | |
ea3f0419 | 1175 | return sched_clutch_bucket_runq_peek(&root_bucket->scrb_clutch_buckets); |
cb323159 A |
1176 | } |
1177 | ||
1178 | /* | |
1179 | * sched_clutch_bucket_runnable() | |
1180 | * | |
1181 | * Perform all operations needed when a new clutch bucket becomes runnable. | |
1182 | * It involves inserting the clutch_bucket into the hierarchy and updating the | |
1183 | * root priority appropriately. | |
1184 | */ | |
1185 | static boolean_t | |
1186 | sched_clutch_bucket_runnable( | |
1187 | sched_clutch_bucket_t clutch_bucket, | |
1188 | sched_clutch_root_t root_clutch, | |
ea3f0419 A |
1189 | uint64_t timestamp, |
1190 | sched_clutch_bucket_options_t options) | |
cb323159 A |
1191 | { |
1192 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1193 | sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp); | |
1194 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
ea3f0419 | 1195 | sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options); |
cb323159 A |
1196 | /* Update the timesharing properties of this clutch_bucket; also done every sched_tick */ |
1197 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
1198 | int16_t root_old_pri = root_clutch->scr_priority; | |
1199 | sched_clutch_root_pri_update(root_clutch); | |
1200 | return root_clutch->scr_priority > root_old_pri; | |
1201 | } | |
1202 | ||
1203 | /* | |
1204 | * sched_clutch_bucket_update() | |
1205 | * | |
ea3f0419 A |
1206 | * Update the clutch_bucket's position in the hierarchy. This routine is |
1207 | * called when a new thread is inserted or removed from a runnable clutch | |
1208 | * bucket. The options specify some properties about the clutch bucket | |
1209 | * insertion order into the clutch bucket runq. | |
cb323159 A |
1210 | */ |
1211 | static boolean_t | |
1212 | sched_clutch_bucket_update( | |
1213 | sched_clutch_bucket_t clutch_bucket, | |
1214 | sched_clutch_root_t root_clutch, | |
ea3f0419 A |
1215 | uint64_t timestamp, |
1216 | sched_clutch_bucket_options_t options) | |
cb323159 A |
1217 | { |
1218 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1219 | uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
ea3f0419 | 1220 | sched_clutch_bucket_runq_t bucket_runq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets; |
cb323159 | 1221 | if (new_pri == clutch_bucket->scb_priority) { |
ea3f0419 A |
1222 | /* |
1223 | * If SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR is specified, move the clutch bucket | |
1224 | * to the end of the runq. Typically used when a thread is selected for execution | |
1225 | * from a clutch bucket. | |
1226 | */ | |
1227 | if (options & SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR) { | |
1228 | sched_clutch_bucket_runq_rotate(bucket_runq, clutch_bucket); | |
1229 | } | |
cb323159 A |
1230 | return false; |
1231 | } | |
ea3f0419 A |
1232 | sched_clutch_bucket_runq_remove(bucket_runq, clutch_bucket); |
1233 | clutch_bucket->scb_priority = new_pri; | |
1234 | sched_clutch_bucket_runq_enqueue(bucket_runq, clutch_bucket, options); | |
cb323159 A |
1235 | |
1236 | int16_t root_old_pri = root_clutch->scr_priority; | |
1237 | sched_clutch_root_pri_update(root_clutch); | |
1238 | return root_clutch->scr_priority > root_old_pri; | |
1239 | } | |
1240 | ||
1241 | /* | |
1242 | * sched_clutch_bucket_empty() | |
1243 | * | |
1244 | * Perform all the operations needed when a clutch_bucket is no longer runnable. | |
1245 | * It involves removing the clutch bucket from the hierarchy and updaing the root | |
1246 | * priority appropriately. | |
1247 | */ | |
1248 | static void | |
1249 | sched_clutch_bucket_empty( | |
1250 | sched_clutch_bucket_t clutch_bucket, | |
1251 | sched_clutch_root_t root_clutch, | |
ea3f0419 A |
1252 | uint64_t timestamp, |
1253 | sched_clutch_bucket_options_t options) | |
cb323159 A |
1254 | { |
1255 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
ea3f0419 | 1256 | sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options); |
cb323159 A |
1257 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); |
1258 | sched_clutch_root_pri_update(root_clutch); | |
1259 | } | |
1260 | ||
1261 | /* | |
1262 | * sched_clutch_cpu_usage_update() | |
1263 | * | |
1264 | * Routine to update CPU usage of the thread in the hierarchy. | |
1265 | */ | |
1266 | void | |
1267 | sched_clutch_cpu_usage_update( | |
1268 | thread_t thread, | |
1269 | uint64_t delta) | |
1270 | { | |
1271 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1272 | return; | |
1273 | } | |
1274 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1275 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1276 | sched_clutch_bucket_cpu_usage_update(clutch_bucket, delta); | |
1277 | } | |
1278 | ||
1279 | /* | |
1280 | * sched_clutch_bucket_cpu_usage_update() | |
1281 | * | |
1282 | * Routine to update the CPU usage of the clutch_bucket. | |
1283 | */ | |
1284 | static void | |
1285 | sched_clutch_bucket_cpu_usage_update( | |
1286 | sched_clutch_bucket_t clutch_bucket, | |
1287 | uint64_t delta) | |
1288 | { | |
1289 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
1290 | /* Since Above UI bucket has maximum interactivity score always, nothing to do here */ | |
1291 | return; | |
1292 | } | |
1293 | ||
1294 | /* | |
1295 | * The CPU usage should not overflow the clutch_cpu_data_t type. Since the usage is used to | |
1296 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1297 | */ | |
1298 | delta = MIN(delta, CLUTCH_CPU_DATA_MAX); | |
1299 | os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_used), (clutch_cpu_data_t)delta, relaxed); | |
1300 | } | |
1301 | ||
1302 | /* | |
1303 | * sched_clutch_bucket_cpu_blocked_update() | |
1304 | * | |
1305 | * Routine to update CPU blocked time for clutch_bucket. | |
1306 | */ | |
1307 | static void | |
1308 | sched_clutch_bucket_cpu_blocked_update( | |
1309 | sched_clutch_bucket_t clutch_bucket, | |
1310 | uint64_t timestamp) | |
1311 | { | |
1312 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
1313 | (clutch_bucket->scb_blocked_ts == SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID)) { | |
1314 | /* For Above UI bucket and a newly initialized clutch bucket, nothing to do here */ | |
1315 | return; | |
1316 | } | |
1317 | ||
1318 | uint64_t blocked_time = timestamp - clutch_bucket->scb_blocked_ts; | |
1319 | if (blocked_time > sched_clutch_bucket_adjust_threshold) { | |
1320 | blocked_time = sched_clutch_bucket_adjust_threshold; | |
1321 | } | |
1322 | ||
1323 | /* | |
1324 | * The CPU blocked should not overflow the clutch_cpu_data_t type. Since the blocked is used to | |
1325 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1326 | */ | |
1327 | blocked_time = MIN(blocked_time, CLUTCH_CPU_DATA_MAX); | |
1328 | clutch_cpu_data_t __assert_only cpu_blocked_orig = os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_blocked), (clutch_cpu_data_t)blocked_time, relaxed); | |
1329 | /* The blocked time is scaled every so often, it should never overflow */ | |
1330 | assert(blocked_time <= (CLUTCH_CPU_DATA_MAX - cpu_blocked_orig)); | |
1331 | } | |
1332 | ||
1333 | /* | |
1334 | * sched_clutch_bucket_cpu_adjust() | |
1335 | * | |
1336 | * Routine to scale the cpu usage and blocked time once the sum gets bigger | |
1337 | * than sched_clutch_bucket_adjust_threshold. Allows the values to remain | |
1338 | * manageable and maintain the same ratio while allowing clutch buckets to | |
1339 | * adjust behavior and reflect in the interactivity score in a reasonable | |
1340 | * amount of time. | |
1341 | */ | |
1342 | static void | |
1343 | sched_clutch_bucket_cpu_adjust( | |
1344 | sched_clutch_bucket_t clutch_bucket) | |
1345 | { | |
1346 | sched_clutch_bucket_cpu_data_t old_cpu_data = {}; | |
1347 | sched_clutch_bucket_cpu_data_t new_cpu_data = {}; | |
1348 | do { | |
1349 | old_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
1350 | clutch_cpu_data_t cpu_used = old_cpu_data.cpu_data.scbcd_cpu_used; | |
1351 | clutch_cpu_data_t cpu_blocked = old_cpu_data.cpu_data.scbcd_cpu_blocked; | |
1352 | if ((cpu_used + cpu_blocked) < sched_clutch_bucket_adjust_threshold) { | |
1353 | return; | |
1354 | } | |
1355 | ||
1356 | /* | |
1357 | * The accumulation of CPU used and blocked is past the threshold; scale it | |
1358 | * down to lose old history. | |
1359 | */ | |
1360 | new_cpu_data.cpu_data.scbcd_cpu_used = cpu_used / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1361 | new_cpu_data.cpu_data.scbcd_cpu_blocked = cpu_blocked / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1362 | } while (!os_atomic_cmpxchg(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, old_cpu_data.scbcd_cpu_data_packed, new_cpu_data.scbcd_cpu_data_packed, relaxed)); | |
1363 | } | |
1364 | ||
1365 | /* | |
1366 | * Thread level scheduling algorithm | |
1367 | * | |
1368 | * The thread level scheduling algorithm uses the mach timeshare | |
1369 | * decay based algorithm to achieve sharing between threads within the | |
1370 | * same clutch bucket. The load/priority shifts etc. are all maintained | |
1371 | * at the clutch bucket level and used for decay calculation of the | |
1372 | * threads. The load sampling is still driven off the scheduler tick | |
1373 | * for runnable clutch buckets (it does not use the new higher frequency | |
1374 | * EWMA based load calculation). The idea is that the contention and load | |
1375 | * within clutch_buckets should be limited enough to not see heavy decay | |
1376 | * and timeshare effectively. | |
1377 | */ | |
1378 | ||
1379 | /* | |
1380 | * sched_clutch_thread_run_bucket_incr() / sched_clutch_run_bucket_incr() | |
1381 | * | |
1382 | * Increment the run count for the clutch bucket associated with the | |
1383 | * thread. | |
1384 | */ | |
1385 | uint32_t | |
1386 | sched_clutch_thread_run_bucket_incr( | |
1387 | thread_t thread, | |
1388 | sched_bucket_t bucket) | |
1389 | { | |
1390 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1391 | return 0; | |
1392 | } | |
1393 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1394 | return sched_clutch_run_bucket_incr(clutch, bucket); | |
1395 | } | |
1396 | ||
1397 | static uint32_t | |
1398 | sched_clutch_run_bucket_incr( | |
1399 | sched_clutch_t clutch, | |
1400 | sched_bucket_t bucket) | |
1401 | { | |
1402 | assert(bucket != TH_BUCKET_RUN); | |
1403 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1404 | uint32_t result = os_atomic_inc(&(clutch_bucket->scb_run_count), relaxed); | |
1405 | return result; | |
1406 | } | |
1407 | ||
1408 | /* | |
1409 | * sched_clutch_thread_run_bucket_decr() / sched_clutch_run_bucket_decr() | |
1410 | * | |
1411 | * Decrement the run count for the clutch bucket associated with the | |
1412 | * thread. | |
1413 | */ | |
1414 | uint32_t | |
1415 | sched_clutch_thread_run_bucket_decr( | |
1416 | thread_t thread, | |
1417 | sched_bucket_t bucket) | |
1418 | { | |
1419 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1420 | return 0; | |
1421 | } | |
1422 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1423 | return sched_clutch_run_bucket_decr(clutch, bucket); | |
1424 | } | |
1425 | ||
1426 | static uint32_t | |
1427 | sched_clutch_run_bucket_decr( | |
1428 | sched_clutch_t clutch, | |
1429 | sched_bucket_t bucket) | |
1430 | { | |
1431 | assert(bucket != TH_BUCKET_RUN); | |
1432 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1433 | uint32_t result = os_atomic_dec(&(clutch_bucket->scb_run_count), relaxed); | |
1434 | return result; | |
1435 | } | |
1436 | ||
1437 | /* | |
1438 | * sched_clutch_bucket_timeshare_update() | |
1439 | * | |
1440 | * Routine to update the load and priority shift for the clutch_bucket every | |
1441 | * sched_tick. For runnable clutch_buckets, the sched tick handling code | |
1442 | * iterates the clutch buckets and calls this routine. For all others, the | |
1443 | * clutch_bucket maintains a "last updated schedtick" parameter. As threads | |
1444 | * become runnable in the clutch bucket, if this value is outdated, the load | |
1445 | * and shifts are updated. | |
1446 | * | |
1447 | * Possible optimization: | |
1448 | * - The current algorithm samples the load every sched tick (125ms). | |
1449 | * This is prone to spikes in runnable counts; if that turns out to be | |
1450 | * a problem, a simple solution would be to do the EWMA trick to sample | |
1451 | * load at every load_tick (30ms) and use the averaged value for the pri | |
1452 | * shift calculation. | |
1453 | */ | |
1454 | static void | |
1455 | sched_clutch_bucket_timeshare_update( | |
1456 | sched_clutch_bucket_t clutch_bucket) | |
1457 | { | |
1458 | if (clutch_bucket->scb_bucket < TH_BUCKET_SHARE_FG) { | |
1459 | return; | |
1460 | } | |
1461 | ||
1462 | /* | |
1463 | * Update the timeshare parameters for the clutch bucket if they havent been updated | |
1464 | * in this tick. | |
1465 | */ | |
1466 | uint32_t bucket_sched_ts = os_atomic_load(&clutch_bucket->scb_timeshare_tick, relaxed); | |
1467 | uint32_t current_sched_ts = sched_tick; | |
1468 | if (bucket_sched_ts != current_sched_ts) { | |
1469 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, current_sched_ts, relaxed); | |
1470 | uint32_t bucket_load = (os_atomic_load(&clutch_bucket->scb_run_count, relaxed) / processor_avail_count); | |
1471 | bucket_load = MIN(bucket_load, NRQS - 1); | |
1472 | uint32_t pri_shift = sched_fixed_shift - sched_load_shifts[bucket_load]; | |
1473 | os_atomic_store(&clutch_bucket->scb_pri_shift, pri_shift, relaxed); | |
1474 | } | |
1475 | } | |
1476 | ||
1477 | /* | |
1478 | * sched_clutch_thread_clutch_update() | |
1479 | * | |
1480 | * Routine called when the thread changes its thread group. The current | |
1481 | * implementation relies on the fact that the thread group is changed only | |
1482 | * from the context of the thread itself. Due to this fact, the thread | |
1483 | * group change causes only counter updates in the old & new clutch | |
1484 | * buckets and no hierarchy changes. The routine also attributes the CPU | |
1485 | * used so far to the old clutch. | |
1486 | */ | |
1487 | void | |
1488 | sched_clutch_thread_clutch_update( | |
1489 | thread_t thread, | |
1490 | sched_clutch_t old_clutch, | |
1491 | sched_clutch_t new_clutch) | |
1492 | { | |
1493 | uint32_t cpu_delta; | |
1494 | assert(current_thread() == thread); | |
1495 | ||
1496 | if (old_clutch) { | |
1497 | sched_clutch_run_bucket_decr(old_clutch, thread->th_sched_bucket); | |
1498 | /* | |
1499 | * Calculate the CPU used by this thread in the old bucket and | |
1500 | * add it to the old clutch bucket. This uses the same CPU usage | |
1501 | * logic as update_priority etc. | |
1502 | */ | |
1503 | thread_timer_delta(thread, cpu_delta); | |
1504 | if (thread->pri_shift < INT8_MAX) { | |
1505 | thread->sched_usage += cpu_delta; | |
1506 | } | |
1507 | thread->cpu_delta += cpu_delta; | |
1508 | sched_clutch_bucket_cpu_usage_update(&(old_clutch->sc_clutch_buckets[thread->th_sched_bucket]), cpu_delta); | |
1509 | } | |
1510 | ||
1511 | if (new_clutch) { | |
1512 | sched_clutch_run_bucket_incr(new_clutch, thread->th_sched_bucket); | |
1513 | } | |
1514 | } | |
1515 | ||
1516 | /* Thread Insertion/Removal/Selection routines */ | |
1517 | ||
1518 | /* | |
1519 | * sched_clutch_thread_insert() | |
1520 | * | |
1521 | * Routine to insert a thread into the sched clutch hierarchy. | |
1522 | * Update the counts at all levels of the hierarchy and insert the nodes | |
1523 | * as they become runnable. Always called with the pset lock held. | |
1524 | */ | |
1525 | static boolean_t | |
1526 | sched_clutch_thread_insert( | |
1527 | sched_clutch_root_t root_clutch, | |
1528 | thread_t thread, | |
1529 | integer_t options) | |
1530 | { | |
1531 | boolean_t result = FALSE; | |
1532 | ||
1533 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1534 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1535 | assert(thread->thread_group == clutch->sc_tg); | |
1536 | ||
1537 | uint64_t current_timestamp = mach_absolute_time(); | |
1538 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1539 | assert((clutch_bucket->scb_root == NULL) || (clutch_bucket->scb_root == root_clutch)); | |
1540 | ||
1541 | /* Insert thread into the clutch_bucket runq using sched_pri */ | |
1542 | run_queue_enqueue(&clutch_bucket->scb_runq, thread, options); | |
1543 | /* Increment the urgency counter for the root if necessary */ | |
1544 | sched_clutch_root_urgency_inc(root_clutch, thread); | |
1545 | ||
1546 | /* Insert thread into clutch_bucket priority queue based on the promoted or base priority */ | |
1547 | priority_queue_insert(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1548 | sched_thread_sched_pri_promoted(thread) ? thread->sched_pri : thread->base_pri, | |
1549 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1550 | os_atomic_inc(&clutch->sc_thr_count, relaxed); | |
1551 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1552 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0); | |
1553 | ||
ea3f0419 A |
1554 | /* Enqueue the clutch into the hierarchy (if needed) and update properties; pick the insertion order based on thread options */ |
1555 | sched_clutch_bucket_options_t scb_options = (options & SCHED_HEADQ) ? SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ : SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ; | |
cb323159 A |
1556 | if (clutch_bucket->scb_thr_count == 0) { |
1557 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1558 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
ea3f0419 | 1559 | result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp, scb_options); |
cb323159 A |
1560 | } else { |
1561 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1562 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
ea3f0419 | 1563 | result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, scb_options); |
cb323159 A |
1564 | } |
1565 | return result; | |
1566 | } | |
1567 | ||
1568 | /* | |
1569 | * sched_clutch_thread_remove() | |
1570 | * | |
1571 | * Routine to remove a thread from the sched clutch hierarchy. | |
1572 | * Update the counts at all levels of the hierarchy and remove the nodes | |
1573 | * as they become empty. Always called with the pset lock held. | |
1574 | */ | |
1575 | static void | |
1576 | sched_clutch_thread_remove( | |
1577 | sched_clutch_root_t root_clutch, | |
1578 | thread_t thread, | |
ea3f0419 A |
1579 | uint64_t current_timestamp, |
1580 | sched_clutch_bucket_options_t options) | |
cb323159 A |
1581 | { |
1582 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1583 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1584 | assert(thread->thread_group == clutch->sc_tg); | |
1585 | assert(thread->runq != PROCESSOR_NULL); | |
1586 | ||
1587 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1588 | assert(clutch_bucket->scb_root == root_clutch); | |
1589 | ||
1590 | /* Decrement the urgency counter for the root if necessary */ | |
1591 | sched_clutch_root_urgency_dec(root_clutch, thread); | |
1592 | /* Remove thread from the clutch_bucket */ | |
1593 | run_queue_remove(&clutch_bucket->scb_runq, thread); | |
1594 | ||
1595 | priority_queue_remove(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1596 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1597 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1598 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_EMPTY, 0); | |
1599 | ||
1600 | /* Update counts at various levels of the hierarchy */ | |
1601 | os_atomic_dec(&clutch->sc_thr_count, relaxed); | |
1602 | sched_clutch_thr_count_dec(&root_clutch->scr_thr_count); | |
1603 | sched_clutch_thr_count_dec(&clutch_bucket->scb_thr_count); | |
1604 | ||
1605 | /* Remove the clutch from hierarchy (if needed) and update properties */ | |
1606 | if (clutch_bucket->scb_thr_count == 0) { | |
ea3f0419 | 1607 | sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp, options); |
cb323159 | 1608 | } else { |
ea3f0419 | 1609 | sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, options); |
cb323159 A |
1610 | } |
1611 | } | |
1612 | ||
1613 | /* | |
1614 | * sched_clutch_thread_highest() | |
1615 | * | |
1616 | * Routine to find and remove the highest priority thread | |
1617 | * from the sched clutch hierarchy. The algorithm looks at the | |
1618 | * hierarchy for the most eligible runnable thread and calls | |
1619 | * sched_clutch_thread_remove(). Always called with the | |
1620 | * pset lock held. | |
1621 | */ | |
1622 | static thread_t | |
1623 | sched_clutch_thread_highest( | |
1624 | sched_clutch_root_t root_clutch) | |
1625 | { | |
1626 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1627 | uint64_t current_timestamp = mach_absolute_time(); | |
1628 | ||
1629 | /* Select the highest priority root bucket */ | |
1630 | sched_clutch_root_bucket_t root_bucket = sched_clutch_root_highest_root_bucket(root_clutch, current_timestamp); | |
1631 | if (root_bucket == NULL) { | |
1632 | return THREAD_NULL; | |
1633 | } | |
1634 | /* Since a thread is being picked from this root bucket, update its deadline */ | |
1635 | sched_clutch_root_bucket_deadline_update(root_bucket, root_clutch, current_timestamp); | |
1636 | ||
1637 | /* Find the highest priority clutch bucket in this root bucket */ | |
1638 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
1639 | assert(clutch_bucket != NULL); | |
1640 | ||
1641 | /* Find the highest priority runnable thread in this clutch bucket */ | |
1642 | thread_t thread = run_queue_peek(&clutch_bucket->scb_runq); | |
1643 | assert(thread != NULL); | |
1644 | ||
ea3f0419 A |
1645 | /* Remove and return the thread from the hierarchy; also round robin the clutch bucket if the priority remains unchanged */ |
1646 | sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR); | |
cb323159 A |
1647 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE, |
1648 | thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0); | |
1649 | return thread; | |
1650 | } | |
1651 | ||
1652 | ||
1653 | /* High level global accessor routines */ | |
1654 | ||
1655 | /* | |
1656 | * sched_clutch_root_urgency() | |
1657 | * | |
1658 | * Routine to get the urgency of the highest runnable | |
1659 | * thread in the hierarchy. | |
1660 | */ | |
1661 | static uint32_t | |
1662 | sched_clutch_root_urgency( | |
1663 | sched_clutch_root_t root_clutch) | |
1664 | { | |
1665 | return root_clutch->scr_urgency; | |
1666 | } | |
1667 | ||
1668 | /* | |
1669 | * sched_clutch_root_count_sum() | |
1670 | * | |
1671 | * The count_sum mechanism is used for scheduler runq | |
1672 | * statistics calculation. Its only useful for debugging | |
1673 | * purposes; since it takes a mach_absolute_time() on | |
1674 | * other scheduler implementations, its better to avoid | |
1675 | * populating this until absolutely necessary. | |
1676 | */ | |
1677 | static uint32_t | |
1678 | sched_clutch_root_count_sum( | |
1679 | __unused sched_clutch_root_t root_clutch) | |
1680 | { | |
1681 | return 0; | |
1682 | } | |
1683 | ||
1684 | /* | |
1685 | * sched_clutch_root_priority() | |
1686 | * | |
1687 | * Routine to get the priority of the highest runnable | |
1688 | * thread in the hierarchy. | |
1689 | */ | |
1690 | static int | |
1691 | sched_clutch_root_priority( | |
1692 | sched_clutch_root_t root_clutch) | |
1693 | { | |
1694 | return root_clutch->scr_priority; | |
1695 | } | |
1696 | ||
1697 | /* | |
1698 | * sched_clutch_root_count() | |
1699 | * | |
1700 | * Returns total number of runnable threads in the hierarchy. | |
1701 | */ | |
1702 | uint32_t | |
1703 | sched_clutch_root_count( | |
1704 | sched_clutch_root_t root_clutch) | |
1705 | { | |
1706 | return root_clutch->scr_thr_count; | |
1707 | } | |
1708 | ||
1709 | /* | |
1710 | * sched_clutch_thread_pri_shift() | |
1711 | * | |
1712 | * Routine to get the priority shift value for a thread. | |
1713 | * Since the timesharing is done at the clutch_bucket level, | |
1714 | * this routine gets the clutch_bucket and retrieves the | |
1715 | * values from there. | |
1716 | */ | |
1717 | uint32_t | |
1718 | sched_clutch_thread_pri_shift( | |
1719 | thread_t thread, | |
1720 | sched_bucket_t bucket) | |
1721 | { | |
1722 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1723 | return UINT8_MAX; | |
1724 | } | |
1725 | assert(bucket != TH_BUCKET_RUN); | |
1726 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1727 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1728 | return os_atomic_load(&clutch_bucket->scb_pri_shift, relaxed); | |
1729 | } | |
1730 | ||
1731 | #pragma mark -- Clutch Scheduler Algorithm | |
1732 | ||
1733 | static void | |
1734 | sched_clutch_init(void); | |
1735 | ||
1736 | static void | |
1737 | sched_clutch_timebase_init(void); | |
1738 | ||
1739 | static thread_t | |
1740 | sched_clutch_steal_thread(processor_set_t pset); | |
1741 | ||
1742 | static void | |
1743 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context); | |
1744 | ||
1745 | static boolean_t | |
1746 | sched_clutch_processor_enqueue(processor_t processor, thread_t thread, | |
1747 | sched_options_t options); | |
1748 | ||
1749 | static boolean_t | |
1750 | sched_clutch_processor_queue_remove(processor_t processor, thread_t thread); | |
1751 | ||
1752 | static ast_t | |
1753 | sched_clutch_processor_csw_check(processor_t processor); | |
1754 | ||
1755 | static boolean_t | |
1756 | sched_clutch_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); | |
1757 | ||
1758 | static int | |
1759 | sched_clutch_runq_count(processor_t processor); | |
1760 | ||
1761 | static boolean_t | |
1762 | sched_clutch_processor_queue_empty(processor_t processor); | |
1763 | ||
1764 | static uint64_t | |
1765 | sched_clutch_runq_stats_count_sum(processor_t processor); | |
1766 | ||
1767 | static int | |
1768 | sched_clutch_processor_bound_count(processor_t processor); | |
1769 | ||
1770 | static void | |
1771 | sched_clutch_pset_init(processor_set_t pset); | |
1772 | ||
1773 | static void | |
1774 | sched_clutch_processor_init(processor_t processor); | |
1775 | ||
1776 | static thread_t | |
1777 | sched_clutch_choose_thread(processor_t processor, int priority, ast_t reason); | |
1778 | ||
1779 | static void | |
1780 | sched_clutch_processor_queue_shutdown(processor_t processor); | |
1781 | ||
1782 | static sched_mode_t | |
1783 | sched_clutch_initial_thread_sched_mode(task_t parent_task); | |
1784 | ||
1785 | static uint32_t | |
1786 | sched_clutch_initial_quantum_size(thread_t thread); | |
1787 | ||
1788 | static bool | |
1789 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread); | |
1790 | ||
1791 | static uint32_t | |
1792 | sched_clutch_run_incr(thread_t thread); | |
1793 | ||
1794 | static uint32_t | |
1795 | sched_clutch_run_decr(thread_t thread); | |
1796 | ||
1797 | static void | |
1798 | sched_clutch_update_thread_bucket(thread_t thread); | |
1799 | ||
1800 | const struct sched_dispatch_table sched_clutch_dispatch = { | |
1801 | .sched_name = "clutch", | |
1802 | .init = sched_clutch_init, | |
1803 | .timebase_init = sched_clutch_timebase_init, | |
1804 | .processor_init = sched_clutch_processor_init, | |
1805 | .pset_init = sched_clutch_pset_init, | |
1806 | .maintenance_continuation = sched_timeshare_maintenance_continue, | |
1807 | .choose_thread = sched_clutch_choose_thread, | |
1808 | .steal_thread_enabled = sched_steal_thread_enabled, | |
1809 | .steal_thread = sched_clutch_steal_thread, | |
1810 | .compute_timeshare_priority = sched_compute_timeshare_priority, | |
1811 | .choose_processor = choose_processor, | |
1812 | .processor_enqueue = sched_clutch_processor_enqueue, | |
1813 | .processor_queue_shutdown = sched_clutch_processor_queue_shutdown, | |
1814 | .processor_queue_remove = sched_clutch_processor_queue_remove, | |
1815 | .processor_queue_empty = sched_clutch_processor_queue_empty, | |
1816 | .priority_is_urgent = priority_is_urgent, | |
1817 | .processor_csw_check = sched_clutch_processor_csw_check, | |
1818 | .processor_queue_has_priority = sched_clutch_processor_queue_has_priority, | |
1819 | .initial_quantum_size = sched_clutch_initial_quantum_size, | |
1820 | .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode, | |
1821 | .can_update_priority = can_update_priority, | |
1822 | .update_priority = update_priority, | |
1823 | .lightweight_update_priority = lightweight_update_priority, | |
1824 | .quantum_expire = sched_default_quantum_expire, | |
1825 | .processor_runq_count = sched_clutch_runq_count, | |
1826 | .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum, | |
1827 | .processor_bound_count = sched_clutch_processor_bound_count, | |
1828 | .thread_update_scan = sched_clutch_thread_update_scan, | |
1829 | .multiple_psets_enabled = TRUE, | |
1830 | .sched_groups_enabled = FALSE, | |
1831 | .avoid_processor_enabled = TRUE, | |
1832 | .thread_avoid_processor = sched_clutch_thread_avoid_processor, | |
1833 | .processor_balance = sched_SMT_balance, | |
1834 | ||
1835 | .rt_runq = sched_rtglobal_runq, | |
1836 | .rt_init = sched_rtglobal_init, | |
1837 | .rt_queue_shutdown = sched_rtglobal_queue_shutdown, | |
1838 | .rt_runq_scan = sched_rtglobal_runq_scan, | |
1839 | .rt_runq_count_sum = sched_rtglobal_runq_count_sum, | |
1840 | ||
1841 | .qos_max_parallelism = sched_qos_max_parallelism, | |
1842 | .check_spill = sched_check_spill, | |
1843 | .ipi_policy = sched_ipi_policy, | |
1844 | .thread_should_yield = sched_thread_should_yield, | |
1845 | .run_count_incr = sched_clutch_run_incr, | |
1846 | .run_count_decr = sched_clutch_run_decr, | |
1847 | .update_thread_bucket = sched_clutch_update_thread_bucket, | |
1848 | .pset_made_schedulable = sched_pset_made_schedulable, | |
1849 | }; | |
1850 | ||
1851 | __attribute__((always_inline)) | |
1852 | static inline run_queue_t | |
1853 | sched_clutch_bound_runq(processor_t processor) | |
1854 | { | |
1855 | return &processor->runq; | |
1856 | } | |
1857 | ||
1858 | __attribute__((always_inline)) | |
1859 | static inline sched_clutch_root_t | |
1860 | sched_clutch_processor_root_clutch(processor_t processor) | |
1861 | { | |
1862 | return &processor->processor_set->pset_clutch_root; | |
1863 | } | |
1864 | ||
1865 | __attribute__((always_inline)) | |
1866 | static inline run_queue_t | |
1867 | sched_clutch_thread_bound_runq(processor_t processor, __assert_only thread_t thread) | |
1868 | { | |
1869 | assert(thread->bound_processor == processor); | |
1870 | return sched_clutch_bound_runq(processor); | |
1871 | } | |
1872 | ||
1873 | static uint32_t | |
1874 | sched_clutch_initial_quantum_size(thread_t thread) | |
1875 | { | |
1876 | if (thread == THREAD_NULL) { | |
1877 | return std_quantum; | |
1878 | } | |
1879 | assert(sched_clutch_thread_quantum[thread->th_sched_bucket] <= UINT32_MAX); | |
1880 | return (uint32_t)sched_clutch_thread_quantum[thread->th_sched_bucket]; | |
1881 | } | |
1882 | ||
1883 | static sched_mode_t | |
1884 | sched_clutch_initial_thread_sched_mode(task_t parent_task) | |
1885 | { | |
1886 | if (parent_task == kernel_task) { | |
1887 | return TH_MODE_FIXED; | |
1888 | } else { | |
1889 | return TH_MODE_TIMESHARE; | |
1890 | } | |
1891 | } | |
1892 | ||
1893 | static void | |
1894 | sched_clutch_processor_init(processor_t processor) | |
1895 | { | |
1896 | run_queue_init(&processor->runq); | |
1897 | } | |
1898 | ||
1899 | static void | |
1900 | sched_clutch_pset_init(processor_set_t pset) | |
1901 | { | |
1902 | sched_clutch_root_init(&pset->pset_clutch_root, pset); | |
1903 | } | |
1904 | ||
1905 | static void | |
1906 | sched_clutch_init(void) | |
1907 | { | |
1908 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactive_pri", &sched_clutch_bucket_interactive_pri, sizeof(sched_clutch_bucket_interactive_pri))) { | |
1909 | sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
1910 | } | |
1911 | if (!PE_parse_boot_argn("sched_clutch_bucket_pri_boost", &sched_clutch_bucket_pri_boost, sizeof(sched_clutch_bucket_pri_boost))) { | |
1912 | sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
1913 | } | |
1914 | sched_timeshare_init(); | |
1915 | } | |
1916 | ||
1917 | static void | |
1918 | sched_clutch_timebase_init(void) | |
1919 | { | |
1920 | sched_timeshare_timebase_init(); | |
1921 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_wcel_us, sched_clutch_root_bucket_wcel); | |
1922 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_warp_us, sched_clutch_root_bucket_warp); | |
1923 | sched_clutch_us_to_abstime(sched_clutch_thread_quantum_us, sched_clutch_thread_quantum); | |
1924 | clock_interval_to_absolutetime_interval(SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS, | |
1925 | NSEC_PER_USEC, &sched_clutch_bucket_adjust_threshold); | |
1926 | ||
1927 | uint32_t interactivity_delta = 0; | |
1928 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactivity_delta_usecs", &interactivity_delta, sizeof(interactivity_delta))) { | |
1929 | interactivity_delta = SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT; | |
1930 | } | |
1931 | clock_interval_to_absolutetime_interval(interactivity_delta, NSEC_PER_USEC, &sched_clutch_bucket_interactivity_delta); | |
1932 | } | |
1933 | ||
1934 | static thread_t | |
1935 | sched_clutch_choose_thread( | |
1936 | processor_t processor, | |
1937 | int priority, | |
1938 | __unused ast_t reason) | |
1939 | { | |
1940 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
1941 | uint32_t clutch_count = sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)); | |
1942 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1943 | boolean_t choose_from_boundq = false; | |
1944 | ||
1945 | if (bound_runq->highq < priority && | |
1946 | clutch_pri < priority) { | |
1947 | return THREAD_NULL; | |
1948 | } | |
1949 | ||
1950 | if (bound_runq->count && clutch_count) { | |
1951 | if (bound_runq->highq >= clutch_pri) { | |
1952 | choose_from_boundq = true; | |
1953 | } | |
1954 | } else if (bound_runq->count) { | |
1955 | choose_from_boundq = true; | |
1956 | } else if (clutch_count) { | |
1957 | choose_from_boundq = false; | |
1958 | } else { | |
1959 | return THREAD_NULL; | |
1960 | } | |
1961 | ||
1962 | thread_t thread = THREAD_NULL; | |
1963 | if (choose_from_boundq == false) { | |
1964 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1965 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1966 | } else { | |
1967 | thread = run_queue_dequeue(bound_runq, SCHED_HEADQ); | |
1968 | } | |
1969 | return thread; | |
1970 | } | |
1971 | ||
1972 | static boolean_t | |
1973 | sched_clutch_processor_enqueue( | |
1974 | processor_t processor, | |
1975 | thread_t thread, | |
1976 | sched_options_t options) | |
1977 | { | |
1978 | boolean_t result; | |
1979 | ||
1980 | thread->runq = processor; | |
1981 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1982 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1983 | result = sched_clutch_thread_insert(pset_clutch_root, thread, options); | |
1984 | } else { | |
1985 | run_queue_t rq = sched_clutch_thread_bound_runq(processor, thread); | |
1986 | result = run_queue_enqueue(rq, thread, options); | |
1987 | } | |
1988 | return result; | |
1989 | } | |
1990 | ||
1991 | static boolean_t | |
1992 | sched_clutch_processor_queue_empty(processor_t processor) | |
1993 | { | |
1994 | return sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0 && | |
1995 | sched_clutch_bound_runq(processor)->count == 0; | |
1996 | } | |
1997 | ||
1998 | static ast_t | |
1999 | sched_clutch_processor_csw_check(processor_t processor) | |
2000 | { | |
2001 | boolean_t has_higher; | |
2002 | int pri; | |
2003 | ||
2004 | if (sched_clutch_thread_avoid_processor(processor, current_thread())) { | |
2005 | return AST_PREEMPT | AST_URGENT; | |
2006 | } | |
2007 | ||
2008 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2009 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
2010 | ||
2011 | assert(processor->active_thread != NULL); | |
2012 | ||
2013 | pri = MAX(clutch_pri, bound_runq->highq); | |
2014 | ||
2015 | if (processor->first_timeslice) { | |
2016 | has_higher = (pri > processor->current_pri); | |
2017 | } else { | |
2018 | has_higher = (pri >= processor->current_pri); | |
2019 | } | |
2020 | ||
2021 | if (has_higher) { | |
2022 | if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) { | |
2023 | return AST_PREEMPT | AST_URGENT; | |
2024 | } | |
2025 | ||
2026 | if (bound_runq->urgency > 0) { | |
2027 | return AST_PREEMPT | AST_URGENT; | |
2028 | } | |
2029 | ||
2030 | return AST_PREEMPT; | |
2031 | } | |
2032 | ||
2033 | return AST_NONE; | |
2034 | } | |
2035 | ||
2036 | static boolean_t | |
2037 | sched_clutch_processor_queue_has_priority(processor_t processor, | |
2038 | int priority, | |
2039 | boolean_t gte) | |
2040 | { | |
2041 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2042 | ||
2043 | int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq); | |
2044 | ||
2045 | if (gte) { | |
2046 | return qpri >= priority; | |
2047 | } else { | |
2048 | return qpri > priority; | |
2049 | } | |
2050 | } | |
2051 | ||
2052 | static int | |
2053 | sched_clutch_runq_count(processor_t processor) | |
2054 | { | |
2055 | return (int)sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) + sched_clutch_bound_runq(processor)->count; | |
2056 | } | |
2057 | ||
2058 | static uint64_t | |
2059 | sched_clutch_runq_stats_count_sum(processor_t processor) | |
2060 | { | |
2061 | uint64_t bound_sum = sched_clutch_bound_runq(processor)->runq_stats.count_sum; | |
2062 | ||
2063 | if (processor->cpu_id == processor->processor_set->cpu_set_low) { | |
2064 | return bound_sum + sched_clutch_root_count_sum(sched_clutch_processor_root_clutch(processor)); | |
2065 | } else { | |
2066 | return bound_sum; | |
2067 | } | |
2068 | } | |
2069 | static int | |
2070 | sched_clutch_processor_bound_count(processor_t processor) | |
2071 | { | |
2072 | return sched_clutch_bound_runq(processor)->count; | |
2073 | } | |
2074 | ||
2075 | static void | |
2076 | sched_clutch_processor_queue_shutdown(processor_t processor) | |
2077 | { | |
2078 | processor_set_t pset = processor->processor_set; | |
2079 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
2080 | thread_t thread; | |
2081 | queue_head_t tqueue; | |
2082 | ||
2083 | /* We only need to migrate threads if this is the last active processor in the pset */ | |
2084 | if (pset->online_processor_count > 0) { | |
2085 | pset_unlock(pset); | |
2086 | return; | |
2087 | } | |
2088 | ||
2089 | queue_init(&tqueue); | |
2090 | while (sched_clutch_root_count(pset_clutch_root) > 0) { | |
2091 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2092 | enqueue_tail(&tqueue, &thread->runq_links); | |
2093 | } | |
2094 | ||
2095 | pset_unlock(pset); | |
2096 | ||
2097 | qe_foreach_element_safe(thread, &tqueue, runq_links) { | |
2098 | remqueue(&thread->runq_links); | |
2099 | ||
2100 | thread_lock(thread); | |
2101 | ||
2102 | thread_setrun(thread, SCHED_TAILQ); | |
2103 | ||
2104 | thread_unlock(thread); | |
2105 | } | |
2106 | } | |
2107 | ||
2108 | static boolean_t | |
2109 | sched_clutch_processor_queue_remove( | |
2110 | processor_t processor, | |
2111 | thread_t thread) | |
2112 | { | |
2113 | run_queue_t rq; | |
2114 | processor_set_t pset = processor->processor_set; | |
2115 | ||
2116 | pset_lock(pset); | |
2117 | ||
2118 | if (processor == thread->runq) { | |
2119 | /* | |
2120 | * Thread is on a run queue and we have a lock on | |
2121 | * that run queue. | |
2122 | */ | |
2123 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
2124 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
ea3f0419 | 2125 | sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time(), SCHED_CLUTCH_BUCKET_OPTIONS_NONE); |
cb323159 A |
2126 | } else { |
2127 | rq = sched_clutch_thread_bound_runq(processor, thread); | |
2128 | run_queue_remove(rq, thread); | |
2129 | } | |
2130 | } else { | |
2131 | /* | |
2132 | * The thread left the run queue before we could | |
2133 | * lock the run queue. | |
2134 | */ | |
2135 | assert(thread->runq == PROCESSOR_NULL); | |
2136 | processor = PROCESSOR_NULL; | |
2137 | } | |
2138 | ||
2139 | pset_unlock(pset); | |
2140 | ||
2141 | return processor != PROCESSOR_NULL; | |
2142 | } | |
2143 | ||
2144 | static thread_t | |
2145 | sched_clutch_steal_thread(processor_set_t pset) | |
2146 | { | |
2147 | processor_set_t nset, cset = pset; | |
2148 | thread_t thread; | |
2149 | ||
2150 | do { | |
2151 | sched_clutch_root_t pset_clutch_root = &cset->pset_clutch_root; | |
2152 | if (sched_clutch_root_count(pset_clutch_root) > 0) { | |
2153 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2154 | pset_unlock(cset); | |
2155 | return thread; | |
2156 | } | |
2157 | ||
2158 | nset = next_pset(cset); | |
2159 | ||
2160 | if (nset != pset) { | |
2161 | pset_unlock(cset); | |
2162 | ||
2163 | cset = nset; | |
2164 | pset_lock(cset); | |
2165 | } | |
2166 | } while (nset != pset); | |
2167 | ||
2168 | pset_unlock(cset); | |
2169 | ||
2170 | return THREAD_NULL; | |
2171 | } | |
2172 | ||
2173 | static void | |
2174 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context) | |
2175 | { | |
2176 | boolean_t restart_needed = FALSE; | |
2177 | processor_t processor = processor_list; | |
2178 | processor_set_t pset; | |
2179 | thread_t thread; | |
2180 | spl_t s; | |
2181 | ||
2182 | /* | |
2183 | * We update the threads associated with each processor (bound and idle threads) | |
2184 | * and then update the threads in each pset runqueue. | |
2185 | */ | |
2186 | ||
2187 | do { | |
2188 | do { | |
2189 | pset = processor->processor_set; | |
2190 | ||
2191 | s = splsched(); | |
2192 | pset_lock(pset); | |
2193 | ||
2194 | restart_needed = runq_scan(sched_clutch_bound_runq(processor), scan_context); | |
2195 | ||
2196 | pset_unlock(pset); | |
2197 | splx(s); | |
2198 | ||
2199 | if (restart_needed) { | |
2200 | break; | |
2201 | } | |
2202 | ||
2203 | thread = processor->idle_thread; | |
2204 | if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { | |
2205 | if (thread_update_add_thread(thread) == FALSE) { | |
2206 | restart_needed = TRUE; | |
2207 | break; | |
2208 | } | |
2209 | } | |
2210 | } while ((processor = processor->processor_list) != NULL); | |
2211 | ||
2212 | /* Ok, we now have a collection of candidates -- fix them. */ | |
2213 | thread_update_process_threads(); | |
2214 | } while (restart_needed); | |
2215 | ||
2216 | pset = &pset0; | |
2217 | ||
2218 | do { | |
2219 | do { | |
2220 | s = splsched(); | |
2221 | pset_lock(pset); | |
2222 | ||
2223 | if (sched_clutch_root_count(&pset->pset_clutch_root) > 0) { | |
2224 | queue_t clutch_bucket_list = &pset->pset_clutch_root.scr_clutch_buckets; | |
2225 | sched_clutch_bucket_t clutch_bucket; | |
2226 | qe_foreach_element(clutch_bucket, clutch_bucket_list, scb_listlink) { | |
2227 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
2228 | restart_needed = runq_scan(&clutch_bucket->scb_runq, scan_context); | |
2229 | if (restart_needed) { | |
2230 | break; | |
2231 | } | |
2232 | } | |
2233 | } | |
2234 | ||
2235 | pset_unlock(pset); | |
2236 | splx(s); | |
2237 | if (restart_needed) { | |
2238 | break; | |
2239 | } | |
2240 | } while ((pset = pset->pset_list) != NULL); | |
2241 | ||
2242 | /* Ok, we now have a collection of candidates -- fix them. */ | |
2243 | thread_update_process_threads(); | |
2244 | } while (restart_needed); | |
2245 | } | |
2246 | ||
2247 | extern int sched_allow_rt_smt; | |
2248 | ||
2249 | /* Return true if this thread should not continue running on this processor */ | |
2250 | static bool | |
2251 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread) | |
2252 | { | |
2253 | if (processor->processor_primary != processor) { | |
2254 | /* | |
2255 | * This is a secondary SMT processor. If the primary is running | |
2256 | * a realtime thread, only allow realtime threads on the secondary. | |
2257 | */ | |
2258 | if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) || !sched_allow_rt_smt)) { | |
2259 | return true; | |
2260 | } | |
2261 | } | |
2262 | ||
2263 | return false; | |
2264 | } | |
2265 | ||
2266 | /* | |
2267 | * For the clutch scheduler, the run counts are maintained in the clutch | |
2268 | * buckets (i.e thread group scheduling structure). | |
2269 | */ | |
2270 | static uint32_t | |
2271 | sched_clutch_run_incr(thread_t thread) | |
2272 | { | |
2273 | assert((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN); | |
2274 | uint32_t new_count = os_atomic_inc(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2275 | sched_clutch_thread_run_bucket_incr(thread, thread->th_sched_bucket); | |
2276 | return new_count; | |
2277 | } | |
2278 | ||
2279 | static uint32_t | |
2280 | sched_clutch_run_decr(thread_t thread) | |
2281 | { | |
2282 | assert((thread->state & (TH_RUN | TH_IDLE)) != TH_RUN); | |
2283 | uint32_t new_count = os_atomic_dec(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2284 | sched_clutch_thread_run_bucket_decr(thread, thread->th_sched_bucket); | |
2285 | return new_count; | |
2286 | } | |
2287 | ||
2288 | static sched_bucket_t | |
2289 | sched_convert_pri_to_bucket(uint8_t priority) | |
2290 | { | |
2291 | sched_bucket_t bucket = TH_BUCKET_RUN; | |
2292 | ||
2293 | if (priority > BASEPRI_USER_INITIATED) { | |
2294 | bucket = TH_BUCKET_SHARE_FG; | |
2295 | } else if (priority > BASEPRI_DEFAULT) { | |
2296 | bucket = TH_BUCKET_SHARE_IN; | |
2297 | } else if (priority > BASEPRI_UTILITY) { | |
2298 | bucket = TH_BUCKET_SHARE_DF; | |
2299 | } else if (priority > MAXPRI_THROTTLE) { | |
2300 | bucket = TH_BUCKET_SHARE_UT; | |
2301 | } else { | |
2302 | bucket = TH_BUCKET_SHARE_BG; | |
2303 | } | |
2304 | return bucket; | |
2305 | } | |
2306 | ||
2307 | /* | |
2308 | * For threads that have changed sched_pri without changing the | |
2309 | * base_pri for any reason other than decay, use the sched_pri | |
2310 | * as the bucketizing priority instead of base_pri. All such | |
2311 | * changes are typically due to kernel locking primitives boosts | |
2312 | * or demotions. | |
2313 | */ | |
2314 | static boolean_t | |
2315 | sched_thread_sched_pri_promoted(thread_t thread) | |
2316 | { | |
2317 | return (thread->sched_flags & TH_SFLAG_PROMOTED) || | |
2318 | (thread->sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) || | |
2319 | (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) || | |
2320 | (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || | |
2321 | (thread->kern_promotion_schedpri != 0); | |
2322 | } | |
2323 | ||
2324 | /* | |
2325 | * Routine to update the scheduling bucket for the thread. | |
2326 | * | |
2327 | * In the clutch scheduler implementation, the thread's bucket | |
2328 | * is based on sched_pri if it was promoted due to a kernel | |
2329 | * primitive; otherwise its based on the thread base_pri. This | |
2330 | * enhancement allows promoted threads to reach a higher priority | |
2331 | * bucket and potentially get selected sooner for scheduling. | |
2332 | * | |
2333 | * Also, the clutch scheduler does not honor fixed priority below | |
2334 | * FG priority. It simply puts those threads in the corresponding | |
2335 | * timeshare bucket. The reason for to do that is because it is | |
2336 | * extremely hard to define the scheduling properties of such threads | |
2337 | * and they typically lead to performance issues. | |
2338 | */ | |
2339 | ||
2340 | void | |
2341 | sched_clutch_update_thread_bucket(thread_t thread) | |
2342 | { | |
2343 | sched_bucket_t old_bucket = thread->th_sched_bucket; | |
2344 | sched_bucket_t new_bucket = TH_BUCKET_RUN; | |
2345 | assert(thread->runq == PROCESSOR_NULL); | |
2346 | ||
2347 | int pri = (sched_thread_sched_pri_promoted(thread)) ? thread->sched_pri : thread->base_pri; | |
2348 | ||
2349 | switch (thread->sched_mode) { | |
2350 | case TH_MODE_FIXED: | |
2351 | if (pri >= BASEPRI_FOREGROUND) { | |
2352 | new_bucket = TH_BUCKET_FIXPRI; | |
2353 | } else { | |
2354 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2355 | } | |
2356 | break; | |
2357 | ||
2358 | case TH_MODE_REALTIME: | |
2359 | new_bucket = TH_BUCKET_FIXPRI; | |
2360 | break; | |
2361 | ||
2362 | case TH_MODE_TIMESHARE: | |
2363 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2364 | break; | |
2365 | ||
2366 | default: | |
2367 | panic("unexpected mode: %d", thread->sched_mode); | |
2368 | break; | |
2369 | } | |
2370 | ||
2371 | if (old_bucket == new_bucket) { | |
2372 | return; | |
2373 | } | |
2374 | ||
2375 | thread->th_sched_bucket = new_bucket; | |
2376 | thread->pri_shift = sched_clutch_thread_pri_shift(thread, new_bucket); | |
2377 | ||
2378 | /* | |
2379 | * Since this is called after the thread has been removed from the runq, | |
2380 | * only the run counts need to be updated. The re-insert into the runq | |
2381 | * would put the thread into the correct new bucket's runq. | |
2382 | */ | |
2383 | if ((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN) { | |
2384 | sched_clutch_thread_run_bucket_decr(thread, old_bucket); | |
2385 | sched_clutch_thread_run_bucket_incr(thread, new_bucket); | |
2386 | } | |
2387 | } | |
2388 | ||
c6bf4f31 A |
2389 | #if __AMP__ |
2390 | ||
2391 | /* Implementation of the AMP version of the clutch scheduler */ | |
2392 | ||
2393 | static thread_t | |
2394 | sched_clutch_amp_steal_thread(processor_set_t pset); | |
2395 | ||
2396 | static ast_t | |
2397 | sched_clutch_amp_processor_csw_check(processor_t processor); | |
2398 | ||
2399 | static boolean_t | |
2400 | sched_clutch_amp_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); | |
2401 | ||
2402 | static boolean_t | |
2403 | sched_clutch_amp_processor_queue_empty(processor_t processor); | |
2404 | ||
2405 | static thread_t | |
2406 | sched_clutch_amp_choose_thread(processor_t processor, int priority, ast_t reason); | |
2407 | ||
2408 | static void | |
2409 | sched_clutch_amp_processor_queue_shutdown(processor_t processor); | |
2410 | ||
2411 | static processor_t | |
2412 | sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread); | |
2413 | ||
2414 | static bool | |
2415 | sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread); | |
2416 | ||
2417 | static bool | |
2418 | sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread); | |
2419 | ||
2420 | static void | |
2421 | sched_clutch_migrate_foreign_buckets(processor_t processor, processor_set_t dst_pset, boolean_t drop_lock); | |
2422 | ||
2423 | static void | |
2424 | sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation); | |
2425 | ||
2426 | const struct sched_dispatch_table sched_clutch_amp_dispatch = { | |
2427 | .sched_name = "clutch_amp", | |
2428 | .init = sched_amp_init, | |
2429 | .timebase_init = sched_clutch_timebase_init, | |
2430 | .processor_init = sched_clutch_processor_init, | |
2431 | .pset_init = sched_clutch_pset_init, | |
2432 | .maintenance_continuation = sched_timeshare_maintenance_continue, | |
2433 | .choose_thread = sched_clutch_amp_choose_thread, | |
2434 | .steal_thread_enabled = sched_amp_steal_thread_enabled, | |
2435 | .steal_thread = sched_clutch_amp_steal_thread, | |
2436 | .compute_timeshare_priority = sched_compute_timeshare_priority, | |
2437 | .choose_processor = sched_clutch_amp_choose_processor, | |
2438 | .processor_enqueue = sched_clutch_processor_enqueue, | |
2439 | .processor_queue_shutdown = sched_clutch_amp_processor_queue_shutdown, | |
2440 | .processor_queue_remove = sched_clutch_processor_queue_remove, | |
2441 | .processor_queue_empty = sched_clutch_amp_processor_queue_empty, | |
2442 | .priority_is_urgent = priority_is_urgent, | |
2443 | .processor_csw_check = sched_clutch_amp_processor_csw_check, | |
2444 | .processor_queue_has_priority = sched_clutch_amp_processor_queue_has_priority, | |
2445 | .initial_quantum_size = sched_clutch_initial_quantum_size, | |
2446 | .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode, | |
2447 | .can_update_priority = can_update_priority, | |
2448 | .update_priority = update_priority, | |
2449 | .lightweight_update_priority = lightweight_update_priority, | |
2450 | .quantum_expire = sched_default_quantum_expire, | |
2451 | .processor_runq_count = sched_clutch_runq_count, | |
2452 | .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum, | |
2453 | .processor_bound_count = sched_clutch_processor_bound_count, | |
2454 | .thread_update_scan = sched_clutch_thread_update_scan, | |
2455 | .multiple_psets_enabled = TRUE, | |
2456 | .sched_groups_enabled = FALSE, | |
2457 | .avoid_processor_enabled = TRUE, | |
2458 | .thread_avoid_processor = sched_clutch_amp_thread_avoid_processor, | |
2459 | .processor_balance = sched_amp_balance, | |
2460 | ||
2461 | .rt_runq = sched_amp_rt_runq, | |
2462 | .rt_init = sched_amp_rt_init, | |
2463 | .rt_queue_shutdown = sched_amp_rt_queue_shutdown, | |
2464 | .rt_runq_scan = sched_amp_rt_runq_scan, | |
2465 | .rt_runq_count_sum = sched_amp_rt_runq_count_sum, | |
2466 | ||
2467 | .qos_max_parallelism = sched_amp_qos_max_parallelism, | |
2468 | .check_spill = sched_amp_check_spill, | |
2469 | .ipi_policy = sched_amp_ipi_policy, | |
2470 | .thread_should_yield = sched_clutch_amp_thread_should_yield, | |
2471 | .run_count_incr = sched_clutch_run_incr, | |
2472 | .run_count_decr = sched_clutch_run_decr, | |
2473 | .update_thread_bucket = sched_clutch_update_thread_bucket, | |
2474 | .pset_made_schedulable = sched_clutch_migrate_foreign_buckets, | |
2475 | .thread_group_recommendation_change = sched_clutch_amp_thread_group_recommendation_change, | |
2476 | }; | |
2477 | ||
2478 | extern processor_set_t ecore_set; | |
2479 | extern processor_set_t pcore_set; | |
2480 | ||
2481 | static thread_t | |
2482 | sched_clutch_amp_choose_thread( | |
2483 | processor_t processor, | |
2484 | int priority, | |
2485 | __unused ast_t reason) | |
2486 | { | |
2487 | processor_set_t pset = processor->processor_set; | |
2488 | bool spill_pending = false; | |
2489 | int spill_pri = -1; | |
2490 | ||
2491 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2492 | spill_pending = true; | |
2493 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2494 | } | |
2495 | ||
2496 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
2497 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2498 | boolean_t choose_from_boundq = false; | |
2499 | ||
2500 | if ((bound_runq->highq < priority) && | |
2501 | (clutch_pri < priority) && | |
2502 | (spill_pri < priority)) { | |
2503 | return THREAD_NULL; | |
2504 | } | |
2505 | ||
2506 | if ((spill_pri > bound_runq->highq) && | |
2507 | (spill_pri > clutch_pri)) { | |
2508 | /* | |
2509 | * There is a higher priority thread on the P-core runq, | |
2510 | * so returning THREAD_NULL here will cause thread_select() | |
2511 | * to call sched_clutch_amp_steal_thread() to try to get it. | |
2512 | */ | |
2513 | return THREAD_NULL; | |
2514 | } | |
2515 | ||
2516 | if (bound_runq->highq >= clutch_pri) { | |
2517 | choose_from_boundq = true; | |
2518 | } | |
2519 | ||
2520 | thread_t thread = THREAD_NULL; | |
2521 | if (choose_from_boundq == false) { | |
2522 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
2523 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2524 | } else { | |
2525 | thread = run_queue_dequeue(bound_runq, SCHED_HEADQ); | |
2526 | } | |
2527 | return thread; | |
2528 | } | |
2529 | ||
2530 | static boolean_t | |
2531 | sched_clutch_amp_processor_queue_empty(processor_t processor) | |
2532 | { | |
2533 | processor_set_t pset = processor->processor_set; | |
2534 | bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2535 | ||
2536 | return (sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0) && | |
2537 | (sched_clutch_bound_runq(processor)->count == 0) && | |
2538 | !spill_pending; | |
2539 | } | |
2540 | ||
2541 | static bool | |
2542 | sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread) | |
2543 | { | |
2544 | if (!sched_clutch_amp_processor_queue_empty(processor) || (rt_runq_count(processor->processor_set) > 0)) { | |
2545 | return true; | |
2546 | } | |
2547 | ||
2548 | if ((processor->processor_set->pset_cluster_type == PSET_AMP_E) && (recommended_pset_type(thread) == PSET_AMP_P)) { | |
2549 | return sched_clutch_root_count(&pcore_set->pset_clutch_root) > 0; | |
2550 | } | |
2551 | ||
2552 | return false; | |
2553 | } | |
2554 | ||
2555 | static ast_t | |
2556 | sched_clutch_amp_processor_csw_check(processor_t processor) | |
2557 | { | |
2558 | boolean_t has_higher; | |
2559 | int pri; | |
2560 | ||
2561 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
2562 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2563 | ||
2564 | assert(processor->active_thread != NULL); | |
2565 | ||
2566 | processor_set_t pset = processor->processor_set; | |
2567 | bool spill_pending = false; | |
2568 | int spill_pri = -1; | |
2569 | int spill_urgency = 0; | |
2570 | ||
2571 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2572 | spill_pending = true; | |
2573 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2574 | spill_urgency = sched_clutch_root_urgency(&pcore_set->pset_clutch_root); | |
2575 | } | |
2576 | ||
2577 | pri = MAX(clutch_pri, bound_runq->highq); | |
2578 | if (spill_pending) { | |
2579 | pri = MAX(pri, spill_pri); | |
2580 | } | |
2581 | ||
2582 | if (processor->first_timeslice) { | |
2583 | has_higher = (pri > processor->current_pri); | |
2584 | } else { | |
2585 | has_higher = (pri >= processor->current_pri); | |
2586 | } | |
2587 | ||
2588 | if (has_higher) { | |
2589 | if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) { | |
2590 | return AST_PREEMPT | AST_URGENT; | |
2591 | } | |
2592 | ||
2593 | if (bound_runq->urgency > 0) { | |
2594 | return AST_PREEMPT | AST_URGENT; | |
2595 | } | |
2596 | ||
2597 | if (spill_urgency > 0) { | |
2598 | return AST_PREEMPT | AST_URGENT; | |
2599 | } | |
2600 | ||
2601 | return AST_PREEMPT; | |
2602 | } | |
2603 | ||
2604 | return AST_NONE; | |
2605 | } | |
2606 | ||
2607 | static boolean_t | |
2608 | sched_clutch_amp_processor_queue_has_priority(processor_t processor, | |
2609 | int priority, | |
2610 | boolean_t gte) | |
2611 | { | |
2612 | bool spill_pending = false; | |
2613 | int spill_pri = -1; | |
2614 | processor_set_t pset = processor->processor_set; | |
2615 | ||
2616 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2617 | spill_pending = true; | |
2618 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2619 | } | |
2620 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2621 | ||
2622 | int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq); | |
2623 | if (spill_pending) { | |
2624 | qpri = MAX(qpri, spill_pri); | |
2625 | } | |
2626 | ||
2627 | if (gte) { | |
2628 | return qpri >= priority; | |
2629 | } else { | |
2630 | return qpri > priority; | |
2631 | } | |
2632 | } | |
2633 | ||
2634 | /* | |
2635 | * sched_clutch_hierarchy_thread_pset() | |
2636 | * | |
2637 | * Routine to determine where a thread should be enqueued based on its | |
2638 | * recommendation if this is the first runnable thread in the clutch_bucket | |
2639 | * or its clutch bucket's hierarchy membership. | |
2640 | */ | |
2641 | static processor_set_t | |
2642 | sched_clutch_hierarchy_thread_pset(thread_t thread) | |
2643 | { | |
2644 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread) == false) { | |
2645 | return (recommended_pset_type(thread) == PSET_AMP_P) ? pcore_set : ecore_set; | |
2646 | } | |
2647 | ||
2648 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
2649 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
2650 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
2651 | if (scb_root) { | |
2652 | /* Clutch bucket is already runnable, return the pset hierarchy its part of */ | |
2653 | return scb_root->scr_pset; | |
2654 | } | |
2655 | return (recommended_pset_type(thread) == PSET_AMP_E) ? ecore_set : pcore_set; | |
2656 | } | |
2657 | ||
2658 | /* | |
2659 | * sched_clutch_thread_pset_recommended() | |
2660 | * | |
2661 | * Routine to determine if the thread should be placed on the provided pset. | |
2662 | * The routine first makes sure the cluster is available for scheduling. If | |
2663 | * it is available, it looks at the thread's recommendation. Called | |
2664 | * with the pset lock held. | |
2665 | */ | |
2666 | static bool | |
2667 | sched_clutch_thread_pset_recommended(thread_t thread, processor_set_t pset) | |
2668 | { | |
2669 | if (!sched_clutch_pset_available(pset)) { | |
2670 | return false; | |
2671 | } | |
2672 | ||
2673 | /* At this point, all clusters should be available and recommended */ | |
2674 | if (sched_clutch_hierarchy_thread_pset(thread) != pset) { | |
2675 | return false; | |
2676 | } | |
2677 | ||
2678 | return true; | |
2679 | } | |
2680 | ||
2681 | ||
2682 | static void | |
2683 | sched_clutch_amp_processor_queue_shutdown(processor_t processor) | |
2684 | { | |
2685 | processor_set_t pset = processor->processor_set; | |
2686 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
2687 | thread_t thread; | |
2688 | queue_head_t tqueue; | |
2689 | ||
2690 | /* We only need to migrate threads if this is the last active or last recommended processor in the pset */ | |
2691 | if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) { | |
2692 | pset_unlock(pset); | |
2693 | return; | |
2694 | } | |
2695 | ||
2696 | queue_init(&tqueue); | |
2697 | while (sched_clutch_root_count(pset_clutch_root) > 0) { | |
2698 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2699 | enqueue_tail(&tqueue, &thread->runq_links); | |
2700 | } | |
2701 | pset_unlock(pset); | |
2702 | ||
2703 | qe_foreach_element_safe(thread, &tqueue, runq_links) { | |
2704 | remqueue(&thread->runq_links); | |
2705 | thread_lock(thread); | |
2706 | thread_setrun(thread, SCHED_TAILQ); | |
2707 | thread_unlock(thread); | |
2708 | } | |
2709 | } | |
2710 | ||
2711 | static thread_t | |
2712 | sched_clutch_amp_steal_thread(processor_set_t pset) | |
2713 | { | |
2714 | thread_t thread = THREAD_NULL; | |
2715 | processor_set_t nset = pset; | |
2716 | ||
2717 | if (pcore_set->online_processor_count == 0) { | |
2718 | /* Nothing to steal from */ | |
2719 | goto out; | |
2720 | } | |
2721 | ||
2722 | if (pset->pset_cluster_type == PSET_AMP_P) { | |
2723 | /* P cores don't steal from E cores */ | |
2724 | goto out; | |
2725 | } | |
2726 | ||
2727 | processor_t processor = current_processor(); | |
2728 | assert(pset == processor->processor_set); | |
2729 | ||
2730 | bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2731 | bit_clear(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2732 | ||
2733 | nset = pcore_set; | |
2734 | ||
2735 | assert(nset != pset); | |
2736 | ||
2737 | if (sched_get_pset_load_average(nset) >= sched_amp_steal_threshold(nset, spill_pending)) { | |
2738 | pset_unlock(pset); | |
2739 | ||
2740 | pset = nset; | |
2741 | ||
2742 | pset_lock(pset); | |
2743 | ||
2744 | /* Allow steal if load average still OK, no idle cores, and more threads on runq than active cores DISPATCHING */ | |
2745 | if ((sched_get_pset_load_average(pset) >= sched_amp_steal_threshold(pset, spill_pending)) && | |
2746 | ((int)sched_clutch_root_count(&pset->pset_clutch_root) > bit_count(pset->cpu_state_map[PROCESSOR_DISPATCHING])) && | |
2747 | (bit_count(pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) == 0)) { | |
2748 | thread = sched_clutch_thread_highest(&pset->pset_clutch_root); | |
2749 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_STEAL) | DBG_FUNC_NONE, spill_pending, 0, 0, 0); | |
2750 | sched_update_pset_load_average(pset); | |
2751 | } | |
2752 | } | |
2753 | ||
2754 | out: | |
2755 | pset_unlock(pset); | |
2756 | return thread; | |
2757 | } | |
2758 | ||
2759 | /* Return true if this thread should not continue running on this processor */ | |
2760 | static bool | |
2761 | sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread) | |
2762 | { | |
2763 | if (processor->processor_set->pset_cluster_type == PSET_AMP_E) { | |
2764 | if (sched_clutch_thread_pset_recommended(thread, pcore_set)) { | |
2765 | return true; | |
2766 | } | |
2767 | } else if (processor->processor_set->pset_cluster_type == PSET_AMP_P) { | |
2768 | if (!sched_clutch_thread_pset_recommended(thread, pcore_set)) { | |
2769 | return true; | |
2770 | } | |
2771 | } | |
2772 | ||
2773 | return false; | |
2774 | } | |
2775 | ||
2776 | static processor_t | |
2777 | sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread) | |
2778 | { | |
2779 | /* Bound threads don't call this function */ | |
2780 | assert(thread->bound_processor == PROCESSOR_NULL); | |
2781 | ||
2782 | processor_set_t nset; | |
2783 | processor_t chosen_processor = PROCESSOR_NULL; | |
2784 | ||
2785 | select_pset: | |
2786 | nset = (pset == ecore_set) ? pcore_set : ecore_set; | |
2787 | if (!sched_clutch_pset_available(pset)) { | |
2788 | /* If the current pset is not available for scheduling, just use the other pset */ | |
2789 | pset_unlock(pset); | |
2790 | pset_lock(nset); | |
2791 | goto select_processor; | |
2792 | } | |
2793 | ||
2794 | /* Check if the thread is recommended to run on this pset */ | |
2795 | if (sched_clutch_thread_pset_recommended(thread, pset)) { | |
2796 | nset = pset; | |
2797 | goto select_processor; | |
2798 | } else { | |
2799 | /* pset not recommended; try the other pset */ | |
2800 | pset_unlock(pset); | |
2801 | pset_lock(nset); | |
2802 | pset = nset; | |
2803 | goto select_pset; | |
2804 | } | |
2805 | ||
2806 | select_processor: | |
2807 | if (!sched_clutch_pset_available(nset)) { | |
2808 | /* | |
2809 | * It looks like both psets are not available due to some | |
2810 | * reason. In that case, just use the master processor's | |
2811 | * pset for scheduling. | |
2812 | */ | |
2813 | if (master_processor->processor_set != nset) { | |
2814 | pset_unlock(nset); | |
2815 | nset = master_processor->processor_set; | |
2816 | pset_lock(nset); | |
2817 | } | |
2818 | } | |
2819 | chosen_processor = choose_processor(nset, processor, thread); | |
2820 | assert(chosen_processor->processor_set == nset); | |
2821 | return chosen_processor; | |
2822 | } | |
2823 | ||
2824 | /* | |
2825 | * AMP Clutch Scheduler Thread Migration | |
2826 | * | |
2827 | * For the AMP version of the clutch scheduler the thread is always scheduled via its | |
2828 | * thread group. So it is important to make sure that the thread group is part of the | |
2829 | * correct processor set hierarchy. In order to do that, the clutch scheduler moves | |
2830 | * all eligble clutch buckets to the correct hierarchy when the recommendation of a | |
2831 | * thread group is changed by CLPC. | |
2832 | */ | |
2833 | ||
2834 | /* | |
2835 | * sched_clutch_recommended_pset() | |
2836 | * | |
2837 | * Routine to decide which hierarchy the thread group should be in based on the | |
2838 | * recommendation and other thread group and system properties. This routine is | |
2839 | * used to determine if thread group migration is necessary and should mimic the | |
2840 | * logic in sched_clutch_thread_pset_recommended() & recommended_pset_type(). | |
2841 | */ | |
2842 | static processor_set_t | |
2843 | sched_clutch_recommended_pset(sched_clutch_t sched_clutch, cluster_type_t recommendation) | |
2844 | { | |
2845 | if (!sched_clutch_pset_available(pcore_set)) { | |
2846 | return ecore_set; | |
2847 | } | |
2848 | ||
2849 | if (!sched_clutch_pset_available(ecore_set)) { | |
2850 | return pcore_set; | |
2851 | } | |
2852 | ||
2853 | /* | |
2854 | * If all clusters are available and recommended, use the recommendation | |
2855 | * to decide which cluster to use. | |
2856 | */ | |
2857 | pset_cluster_type_t type = thread_group_pset_recommendation(sched_clutch->sc_tg, recommendation); | |
2858 | return (type == PSET_AMP_E) ? ecore_set : pcore_set; | |
2859 | } | |
2860 | ||
2861 | static void | |
2862 | sched_clutch_bucket_threads_drain(sched_clutch_bucket_t clutch_bucket, sched_clutch_root_t root_clutch, queue_t clutch_threads) | |
2863 | { | |
2864 | uint16_t thread_count = clutch_bucket->scb_thr_count; | |
2865 | thread_t thread; | |
2866 | uint64_t current_timestamp = mach_approximate_time(); | |
2867 | while (thread_count > 0) { | |
2868 | thread = run_queue_peek(&clutch_bucket->scb_runq); | |
ea3f0419 | 2869 | sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_NONE); |
c6bf4f31 A |
2870 | enqueue_tail(clutch_threads, &thread->runq_links); |
2871 | thread_count--; | |
2872 | } | |
2873 | ||
2874 | /* | |
2875 | * This operation should have drained the clutch bucket and pulled it out of the | |
2876 | * hierarchy. | |
2877 | */ | |
2878 | assert(clutch_bucket->scb_thr_count == 0); | |
2879 | assert(clutch_bucket->scb_root == NULL); | |
2880 | } | |
2881 | ||
2882 | /* | |
2883 | * sched_clutch_migrate_thread_group() | |
2884 | * | |
2885 | * Routine to implement the migration of threads when the thread group | |
2886 | * recommendation is updated. The migration works using a 2-phase | |
2887 | * algorithm. | |
2888 | * | |
2889 | * Phase 1: With the source pset (determined by sched_clutch_recommended_pset) | |
2890 | * locked, drain all the runnable threads into a local queue and update the TG | |
2891 | * recommendation. | |
2892 | * | |
2893 | * Phase 2: Call thread_setrun() on all the drained threads. Since the TG recommendation | |
2894 | * has been updated, these should all end up in the right hierarchy. | |
2895 | */ | |
2896 | static void | |
2897 | sched_clutch_migrate_thread_group(sched_clutch_t sched_clutch, cluster_type_t new_recommendation) | |
2898 | { | |
2899 | thread_t thread; | |
2900 | ||
2901 | /* If the thread group is empty, just update the recommendation */ | |
2902 | if (os_atomic_load(&sched_clutch->sc_thr_count, relaxed) == 0) { | |
2903 | thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation); | |
2904 | return; | |
2905 | } | |
2906 | ||
2907 | processor_set_t dst_pset = sched_clutch_recommended_pset(sched_clutch, new_recommendation); | |
2908 | processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set; | |
2909 | ||
2910 | queue_head_t clutch_threads; | |
2911 | queue_init(&clutch_threads); | |
2912 | ||
2913 | /* Interrupts need to be disabled to make sure threads wont become runnable during the | |
2914 | * migration and attempt to grab the pset/thread locks. | |
2915 | */ | |
2916 | spl_t s = splsched(); | |
2917 | ||
2918 | pset_lock(src_pset); | |
2919 | for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SCHED_MAX; bucket++) { | |
2920 | sched_clutch_bucket_t clutch_bucket = &(sched_clutch->sc_clutch_buckets[bucket]); | |
2921 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
2922 | if ((scb_root == NULL) || (scb_root->scr_pset == dst_pset)) { | |
2923 | /* Clutch bucket not runnable or already in the right hierarchy; nothing to do here */ | |
2924 | continue; | |
2925 | } | |
2926 | assert(scb_root->scr_pset == src_pset); | |
2927 | /* Now remove all the threads from the runq so that thread->runq is set correctly */ | |
2928 | sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads); | |
2929 | } | |
2930 | ||
2931 | /* | |
2932 | * Now that all the clutch buckets have been drained, update the TG recommendation. | |
2933 | * This operation needs to be done with the pset lock held to make sure that anyone | |
2934 | * coming in before the migration started would get the original pset as the root | |
2935 | * of this sched_clutch and attempt to hold the src_pset lock. Once the TG changes, | |
2936 | * all threads that are becoming runnable would find the clutch bucket empty and | |
2937 | * the TG recommendation would coax them to enqueue it in the new recommended | |
2938 | * hierarchy. This effectively synchronizes with other threads calling | |
2939 | * thread_setrun() and trying to decide which pset the thread/clutch_bucket | |
2940 | * belongs in. | |
2941 | */ | |
2942 | thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation); | |
2943 | pset_unlock(src_pset); | |
2944 | ||
2945 | /* Now setrun all the threads in the local queue */ | |
2946 | qe_foreach_element_safe(thread, &clutch_threads, runq_links) { | |
2947 | remqueue(&thread->runq_links); | |
2948 | thread_lock(thread); | |
2949 | thread_setrun(thread, SCHED_TAILQ); | |
2950 | thread_unlock(thread); | |
2951 | } | |
2952 | ||
2953 | splx(s); | |
2954 | } | |
2955 | ||
2956 | static void | |
2957 | sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation) | |
2958 | { | |
2959 | /* | |
2960 | * For the clutch scheduler, the change in recommendation moves the thread group | |
2961 | * to the right hierarchy. sched_clutch_migrate_thread_group() is also responsible | |
2962 | * for updating the recommendation of the thread group. | |
2963 | */ | |
2964 | sched_clutch_migrate_thread_group(&tg->tg_sched_clutch, new_recommendation); | |
2965 | ||
2966 | if (new_recommendation != CLUSTER_TYPE_P) { | |
2967 | return; | |
2968 | } | |
2969 | ||
2970 | sched_amp_bounce_thread_group_from_ecores(ecore_set, tg); | |
2971 | } | |
2972 | ||
2973 | /* | |
2974 | * sched_clutch_migrate_foreign_buckets() | |
2975 | * | |
2976 | * Routine to migrate all the clutch buckets which are not in their recommended | |
2977 | * pset hierarchy now that a new pset has become runnable. The algorithm is | |
2978 | * similar to sched_clutch_migrate_thread_group(). | |
2979 | * | |
2980 | * Invoked with the newly recommended pset lock held and interrupts disabled. | |
2981 | */ | |
2982 | static void | |
2983 | sched_clutch_migrate_foreign_buckets(__unused processor_t processor, processor_set_t dst_pset, boolean_t drop_lock) | |
2984 | { | |
2985 | thread_t thread; | |
2986 | processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set; | |
2987 | ||
2988 | if (!sched_clutch_pset_available(dst_pset)) { | |
2989 | /* | |
2990 | * It is possible that some state about the pset changed, | |
2991 | * but its still not available for scheduling. Nothing to | |
2992 | * do here in that case. | |
2993 | */ | |
2994 | if (drop_lock) { | |
2995 | pset_unlock(dst_pset); | |
2996 | } | |
2997 | return; | |
2998 | } | |
2999 | pset_unlock(dst_pset); | |
3000 | ||
3001 | queue_head_t clutch_threads; | |
3002 | queue_init(&clutch_threads); | |
3003 | sched_clutch_root_t src_root = &src_pset->pset_clutch_root; | |
3004 | ||
3005 | pset_lock(src_pset); | |
3006 | queue_t clutch_bucket_list = &src_pset->pset_clutch_root.scr_foreign_buckets; | |
3007 | ||
3008 | if (sched_clutch_root_count(src_root) == 0) { | |
3009 | /* No threads present in this hierarchy */ | |
3010 | pset_unlock(src_pset); | |
3011 | goto migration_complete; | |
3012 | } | |
3013 | ||
3014 | sched_clutch_bucket_t clutch_bucket; | |
3015 | qe_foreach_element_safe(clutch_bucket, clutch_bucket_list, scb_foreignlink) { | |
3016 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
3017 | assert(scb_root->scr_pset == src_pset); | |
3018 | /* Now remove all the threads from the runq so that thread->runq is set correctly */ | |
3019 | sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads); | |
3020 | assert(clutch_bucket->scb_foreign == false); | |
3021 | } | |
3022 | pset_unlock(src_pset); | |
3023 | ||
3024 | /* Now setrun all the threads in the local queue */ | |
3025 | qe_foreach_element_safe(thread, &clutch_threads, runq_links) { | |
3026 | remqueue(&thread->runq_links); | |
3027 | thread_lock(thread); | |
3028 | thread_setrun(thread, SCHED_TAILQ); | |
3029 | thread_unlock(thread); | |
3030 | } | |
3031 | ||
3032 | migration_complete: | |
3033 | if (!drop_lock) { | |
3034 | pset_lock(dst_pset); | |
3035 | } | |
3036 | } | |
3037 | ||
3038 | #endif /* __AMP__ */ | |
cb323159 A |
3039 | |
3040 | #endif /* CONFIG_SCHED_CLUTCH */ |