]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <mach/mach_types.h> | |
30 | #include <mach/machine.h> | |
31 | #include <machine/machine_routines.h> | |
32 | #include <machine/sched_param.h> | |
33 | #include <machine/machine_cpu.h> | |
34 | #include <kern/kern_types.h> | |
35 | #include <kern/debug.h> | |
36 | #include <kern/machine.h> | |
37 | #include <kern/misc_protos.h> | |
38 | #include <kern/processor.h> | |
39 | #include <kern/queue.h> | |
40 | #include <kern/sched.h> | |
41 | #include <kern/sched_prim.h> | |
42 | #include <kern/task.h> | |
43 | #include <kern/thread.h> | |
44 | #include <kern/sched_clutch.h> | |
45 | #include <machine/atomic.h> | |
46 | #include <kern/sched_clutch.h> | |
47 | #include <sys/kdebug.h> | |
48 | ||
c6bf4f31 A |
49 | #if __AMP__ |
50 | #include <kern/sched_amp_common.h> | |
51 | #endif /* __AMP__ */ | |
cb323159 A |
52 | |
53 | #if CONFIG_SCHED_CLUTCH | |
54 | ||
55 | /* Forward declarations of static routines */ | |
56 | ||
57 | /* Root level hierarchy management */ | |
58 | static void sched_clutch_root_init(sched_clutch_root_t, processor_set_t); | |
59 | static void sched_clutch_root_bucket_init(sched_clutch_root_bucket_t, sched_bucket_t); | |
60 | static void sched_clutch_root_pri_update(sched_clutch_root_t); | |
61 | static sched_clutch_root_bucket_t sched_clutch_root_highest_root_bucket(sched_clutch_root_t, uint64_t); | |
62 | static void sched_clutch_root_urgency_inc(sched_clutch_root_t, thread_t); | |
63 | static void sched_clutch_root_urgency_dec(sched_clutch_root_t, thread_t); | |
64 | ||
65 | /* Root bucket level hierarchy management */ | |
66 | static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bucket_t, uint64_t); | |
67 | static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t); | |
68 | static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t); | |
69 | ||
70 | /* Clutch bucket level hierarchy management */ | |
71 | static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t); | |
72 | static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t); | |
73 | static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
74 | static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
75 | static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t); | |
76 | ||
77 | static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t); | |
78 | static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t); | |
79 | static uint8_t sched_clutch_bucket_pri_calculate(sched_clutch_bucket_t, uint64_t); | |
80 | static sched_clutch_bucket_t sched_clutch_root_bucket_highest_clutch_bucket(sched_clutch_root_bucket_t); | |
81 | ||
82 | /* Clutch timeshare properties updates */ | |
83 | static uint32_t sched_clutch_run_bucket_incr(sched_clutch_t, sched_bucket_t); | |
84 | static uint32_t sched_clutch_run_bucket_decr(sched_clutch_t, sched_bucket_t); | |
85 | static void sched_clutch_bucket_cpu_adjust(sched_clutch_bucket_t); | |
86 | static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t); | |
87 | static boolean_t sched_thread_sched_pri_promoted(thread_t); | |
88 | /* Clutch membership management */ | |
89 | static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t); | |
90 | static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t); | |
91 | static thread_t sched_clutch_thread_highest(sched_clutch_root_t); | |
92 | ||
93 | /* Clutch properties updates */ | |
94 | static uint32_t sched_clutch_root_urgency(sched_clutch_root_t); | |
95 | static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t); | |
96 | static int sched_clutch_root_priority(sched_clutch_root_t); | |
97 | ||
c6bf4f31 A |
98 | #if __AMP__ |
99 | /* System based routines */ | |
100 | static bool sched_clutch_pset_available(processor_set_t); | |
101 | #endif /* __AMP__ */ | |
cb323159 A |
102 | |
103 | /* Helper debugging routines */ | |
104 | static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t); | |
105 | ||
106 | ||
107 | ||
108 | /* | |
109 | * Global priority queue comparator routine for root buckets. The | |
110 | * routine implements the priority queue as a minimum deadline queue | |
111 | * to achieve EDF scheduling. | |
112 | */ | |
113 | priority_queue_compare_fn_t sched_clutch_root_bucket_compare; | |
114 | ||
115 | ||
116 | /* | |
117 | * Special markers for buckets that have invalid WCELs/quantums etc. | |
118 | */ | |
119 | #define SCHED_CLUTCH_INVALID_TIME_32 ((uint32_t)~0) | |
120 | #define SCHED_CLUTCH_INVALID_TIME_64 ((uint64_t)~0) | |
121 | ||
122 | /* | |
123 | * Root level bucket WCELs | |
124 | * | |
125 | * The root level bucket selection algorithm is an Earliest Deadline | |
126 | * First (EDF) algorithm where the deadline for buckets are defined | |
127 | * by the worst-case-execution-latency and the make runnable timestamp | |
128 | * for the bucket. | |
129 | * | |
130 | */ | |
131 | static uint32_t sched_clutch_root_bucket_wcel_us[TH_BUCKET_SCHED_MAX] = { | |
132 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
133 | 0, /* FG */ | |
134 | 37500, /* IN (37.5ms) */ | |
135 | 75000, /* DF (75ms) */ | |
136 | 150000, /* UT (150ms) */ | |
137 | 250000 /* BG (250ms) */ | |
138 | }; | |
139 | static uint64_t sched_clutch_root_bucket_wcel[TH_BUCKET_SCHED_MAX] = {0}; | |
140 | ||
141 | /* | |
142 | * Root level bucket warp | |
143 | * | |
144 | * Each root level bucket has a warp value associated with it as well. | |
145 | * The warp value allows the root bucket to effectively warp ahead of | |
146 | * lower priority buckets for a limited time even if it has a later | |
147 | * deadline. The warping behavior provides extra (but limited) | |
148 | * opportunity for high priority buckets to remain responsive. | |
149 | */ | |
150 | ||
151 | /* Special warp deadline value to indicate that the bucket has not used any warp yet */ | |
152 | #define SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED (SCHED_CLUTCH_INVALID_TIME_64) | |
153 | ||
154 | /* Warp window durations for various tiers */ | |
155 | static uint32_t sched_clutch_root_bucket_warp_us[TH_BUCKET_SCHED_MAX] = { | |
156 | SCHED_CLUTCH_INVALID_TIME_32, /* FIXPRI */ | |
157 | 8000, /* FG (8ms)*/ | |
158 | 4000, /* IN (4ms) */ | |
159 | 2000, /* DF (2ms) */ | |
160 | 1000, /* UT (1ms) */ | |
161 | 0 /* BG (0ms) */ | |
162 | }; | |
163 | static uint64_t sched_clutch_root_bucket_warp[TH_BUCKET_SCHED_MAX] = {0}; | |
164 | ||
165 | /* | |
166 | * Thread level quantum | |
167 | * | |
168 | * The algorithm defines quantums for threads at various buckets. This | |
169 | * (combined with the root level bucket quantums) restricts how much | |
170 | * the lower priority levels can preempt the higher priority threads. | |
171 | */ | |
172 | static uint32_t sched_clutch_thread_quantum_us[TH_BUCKET_SCHED_MAX] = { | |
173 | 10000, /* FIXPRI (10ms) */ | |
174 | 10000, /* FG (10ms) */ | |
175 | 8000, /* IN (8ms) */ | |
176 | 6000, /* DF (6ms) */ | |
177 | 4000, /* UT (4ms) */ | |
178 | 2000 /* BG (2ms) */ | |
179 | }; | |
180 | static uint64_t sched_clutch_thread_quantum[TH_BUCKET_SCHED_MAX] = {0}; | |
181 | ||
182 | enum sched_clutch_state { | |
183 | SCHED_CLUTCH_STATE_EMPTY = 0, | |
184 | SCHED_CLUTCH_STATE_RUNNABLE, | |
185 | }; | |
186 | ||
187 | /* | |
188 | * sched_clutch_us_to_abstime() | |
189 | * | |
190 | * Initializer for converting all durations in usec to abstime | |
191 | */ | |
192 | static void | |
193 | sched_clutch_us_to_abstime(uint32_t *us_vals, uint64_t *abstime_vals) | |
194 | { | |
195 | for (int i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
196 | if (us_vals[i] == SCHED_CLUTCH_INVALID_TIME_32) { | |
197 | abstime_vals[i] = SCHED_CLUTCH_INVALID_TIME_64; | |
198 | } else { | |
199 | clock_interval_to_absolutetime_interval(us_vals[i], | |
200 | NSEC_PER_USEC, &abstime_vals[i]); | |
201 | } | |
202 | } | |
203 | } | |
204 | ||
205 | #if DEVELOPMENT || DEBUG | |
206 | ||
207 | /* | |
208 | * sched_clutch_hierarchy_locked_assert() | |
209 | * | |
210 | * Debugging helper routine. Asserts that the hierarchy is locked. The locking | |
211 | * for the hierarchy depends on where the hierarchy is hooked. The current | |
212 | * implementation hooks the hierarchy at the pset, so the hierarchy is locked | |
213 | * using the pset lock. | |
214 | */ | |
215 | static inline void | |
216 | sched_clutch_hierarchy_locked_assert( | |
217 | sched_clutch_root_t root_clutch) | |
218 | { | |
219 | pset_assert_locked(root_clutch->scr_pset); | |
220 | } | |
221 | ||
222 | #else /* DEVELOPMENT || DEBUG */ | |
223 | ||
224 | static inline void | |
225 | sched_clutch_hierarchy_locked_assert( | |
226 | __unused sched_clutch_root_t root_clutch) | |
227 | { | |
228 | } | |
229 | ||
230 | #endif /* DEVELOPMENT || DEBUG */ | |
231 | ||
232 | /* | |
233 | * sched_clutch_thr_count_inc() | |
234 | * | |
235 | * Increment thread count at a hierarchy level with overflow checks. | |
236 | */ | |
237 | static void | |
238 | sched_clutch_thr_count_inc( | |
239 | uint16_t *thr_count) | |
240 | { | |
241 | if (__improbable(os_inc_overflow(thr_count))) { | |
242 | panic("sched_clutch thread count overflowed!"); | |
243 | } | |
244 | } | |
245 | ||
246 | /* | |
247 | * sched_clutch_thr_count_dec() | |
248 | * | |
249 | * Decrement thread count at a hierarchy level with underflow checks. | |
250 | */ | |
251 | static void | |
252 | sched_clutch_thr_count_dec( | |
253 | uint16_t *thr_count) | |
254 | { | |
255 | if (__improbable(os_dec_overflow(thr_count))) { | |
256 | panic("sched_clutch thread count underflowed!"); | |
257 | } | |
258 | } | |
259 | ||
c6bf4f31 A |
260 | #if __AMP__ |
261 | ||
262 | /* | |
263 | * sched_clutch_pset_available() | |
264 | * | |
265 | * Routine to determine if a pset is available for scheduling. | |
266 | */ | |
267 | static bool | |
268 | sched_clutch_pset_available(processor_set_t pset) | |
269 | { | |
270 | /* Check if cluster has none of the CPUs available */ | |
271 | if (pset->online_processor_count == 0) { | |
272 | return false; | |
273 | } | |
274 | ||
275 | /* Check if the cluster is not recommended by CLPC */ | |
276 | if (!pset_is_recommended(pset)) { | |
277 | return false; | |
278 | } | |
279 | ||
280 | return true; | |
281 | } | |
282 | ||
283 | #endif /* __AMP__ */ | |
cb323159 A |
284 | |
285 | /* | |
286 | * sched_clutch_root_init() | |
287 | * | |
288 | * Routine to initialize the scheduler hierarchy root. | |
289 | */ | |
290 | static void | |
291 | sched_clutch_root_init( | |
292 | sched_clutch_root_t root_clutch, | |
293 | processor_set_t pset) | |
294 | { | |
295 | root_clutch->scr_thr_count = 0; | |
296 | root_clutch->scr_priority = NOPRI; | |
297 | root_clutch->scr_urgency = 0; | |
298 | root_clutch->scr_pset = pset; | |
299 | ||
300 | /* Initialize the queue which maintains all runnable clutch_buckets for timesharing purposes */ | |
301 | queue_init(&root_clutch->scr_clutch_buckets); | |
302 | ||
303 | /* Initialize the queue which maintains all runnable foreign clutch buckets */ | |
304 | queue_init(&root_clutch->scr_foreign_buckets); | |
305 | ||
306 | /* Initialize the bitmap and priority queue of runnable root buckets */ | |
307 | sched_clutch_root_bucket_compare = priority_heap_make_comparator(a, b, struct sched_clutch_root_bucket, scrb_pqlink, { | |
308 | return (a->scrb_deadline < b->scrb_deadline) ? 1 : ((a->scrb_deadline == b->scrb_deadline) ? 0 : -1); | |
309 | }); | |
310 | priority_queue_init(&root_clutch->scr_root_buckets, PRIORITY_QUEUE_GENERIC_KEY | PRIORITY_QUEUE_MIN_HEAP); | |
311 | bitmap_zero(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX); | |
312 | bitmap_zero(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
313 | ||
314 | /* Initialize all the root buckets */ | |
315 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
316 | sched_clutch_root_bucket_init(&root_clutch->scr_buckets[i], i); | |
317 | } | |
318 | } | |
319 | ||
320 | /* | |
321 | * sched_clutch_root_bucket_init() | |
322 | * | |
323 | * Routine to initialize root buckets. | |
324 | */ | |
325 | static void | |
326 | sched_clutch_root_bucket_init( | |
327 | sched_clutch_root_bucket_t root_bucket, | |
328 | sched_bucket_t bucket) | |
329 | { | |
330 | root_bucket->scrb_bucket = bucket; | |
331 | priority_queue_init(&root_bucket->scrb_clutch_buckets, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP); | |
332 | priority_queue_entry_init(&root_bucket->scrb_pqlink); | |
333 | root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64; | |
334 | root_bucket->scrb_warped_deadline = 0; | |
335 | root_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[root_bucket->scrb_bucket]; | |
336 | } | |
337 | ||
338 | /* | |
339 | * sched_clutch_root_bucket_pri_compare() | |
340 | * | |
341 | * Routine to compare root buckets based on the highest runnable clutch | |
342 | * bucket priorities in the root buckets. | |
343 | */ | |
344 | static int | |
345 | sched_clutch_root_bucket_pri_compare( | |
346 | sched_clutch_root_bucket_t a, | |
347 | sched_clutch_root_bucket_t b) | |
348 | { | |
349 | sched_clutch_bucket_t a_highest = sched_clutch_root_bucket_highest_clutch_bucket(a); | |
350 | sched_clutch_bucket_t b_highest = sched_clutch_root_bucket_highest_clutch_bucket(b); | |
351 | return (a_highest->scb_priority > b_highest->scb_priority) ? | |
352 | 1 : ((a_highest->scb_priority == b_highest->scb_priority) ? 0 : -1); | |
353 | } | |
354 | ||
355 | /* | |
356 | * sched_clutch_root_select_aboveui() | |
357 | * | |
358 | * Special case scheduling for Above UI bucket. | |
359 | * | |
360 | * AboveUI threads are typically system critical threads that need low latency | |
361 | * which is why they are handled specially. | |
362 | * | |
363 | * Since the priority range for AboveUI and FG Timeshare buckets overlap, it is | |
364 | * important to maintain some native priority order between those buckets. The policy | |
365 | * implemented here is to compare the highest clutch buckets of both buckets; if the | |
366 | * Above UI bucket is higher, schedule it immediately. Otherwise fall through to the | |
367 | * deadline based scheduling which should pickup the timeshare buckets. | |
368 | * | |
369 | * The implementation allows extremely low latency CPU access for Above UI threads | |
370 | * while supporting the use case of high priority timeshare threads contending with | |
371 | * lower priority fixed priority threads. | |
372 | */ | |
373 | static boolean_t | |
374 | sched_clutch_root_select_aboveui( | |
375 | sched_clutch_root_t root_clutch) | |
376 | { | |
377 | if (bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_FIXPRI)) { | |
378 | sched_clutch_root_bucket_t root_bucket_aboveui = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
379 | sched_clutch_root_bucket_t root_bucket_sharefg = &root_clutch->scr_buckets[TH_BUCKET_SHARE_FG]; | |
380 | ||
381 | if (!bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_SHARE_FG)) { | |
382 | /* If the timeshare FG bucket is not runnable, pick the aboveUI bucket for scheduling */ | |
383 | return true; | |
384 | } | |
385 | if (sched_clutch_root_bucket_pri_compare(root_bucket_aboveui, root_bucket_sharefg) >= 0) { | |
386 | /* If the aboveUI bucket has a higher native clutch bucket priority, schedule it */ | |
387 | return true; | |
388 | } | |
389 | } | |
390 | return false; | |
391 | } | |
392 | ||
393 | ||
394 | /* | |
395 | * sched_clutch_root_highest_root_bucket() | |
396 | * | |
397 | * Main routine to find the highest runnable root level bucket. | |
398 | * This routine is called from performance sensitive contexts; so it is | |
399 | * crucial to keep this O(1). | |
400 | * | |
401 | */ | |
402 | static sched_clutch_root_bucket_t | |
403 | sched_clutch_root_highest_root_bucket( | |
404 | sched_clutch_root_t root_clutch, | |
405 | uint64_t timestamp) | |
406 | { | |
407 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
408 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
409 | return NULL; | |
410 | } | |
411 | ||
412 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
413 | return &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
414 | } | |
415 | ||
416 | /* | |
417 | * Above UI bucket is not runnable or has a low priority clutch bucket; use the earliest deadline model | |
418 | * to schedule threads. The idea is that as the timeshare buckets use CPU, they will drop their | |
419 | * interactivity score and allow low priority AboveUI clutch buckets to be scheduled. | |
420 | */ | |
421 | ||
422 | /* Find the earliest deadline bucket */ | |
423 | sched_clutch_root_bucket_t edf_bucket = priority_queue_min(&root_clutch->scr_root_buckets, struct sched_clutch_root_bucket, scrb_pqlink); | |
424 | ||
425 | sched_clutch_root_bucket_t warp_bucket = NULL; | |
426 | int warp_bucket_index = -1; | |
427 | evaluate_warp_buckets: | |
428 | /* Check if any higher runnable buckets have warp available */ | |
429 | warp_bucket_index = bitmap_lsb_first(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX); | |
430 | ||
431 | if ((warp_bucket_index == -1) || (warp_bucket_index >= edf_bucket->scrb_bucket)) { | |
432 | /* No higher buckets have warp available; choose the edf bucket and replenish its warp */ | |
433 | sched_clutch_root_bucket_deadline_update(edf_bucket, root_clutch, timestamp); | |
434 | edf_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[edf_bucket->scrb_bucket]; | |
435 | edf_bucket->scrb_warped_deadline = SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED; | |
436 | bitmap_set(root_clutch->scr_warp_available, edf_bucket->scrb_bucket); | |
437 | return edf_bucket; | |
438 | } | |
439 | ||
440 | /* | |
441 | * Looks like there is a root bucket which is higher in the natural priority | |
442 | * order than edf_bucket and might have some warp remaining. | |
443 | */ | |
444 | warp_bucket = &root_clutch->scr_buckets[warp_bucket_index]; | |
445 | if (warp_bucket->scrb_warped_deadline == SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
446 | /* Root bucket has not used any of its warp; set a deadline to expire its warp and return it */ | |
447 | warp_bucket->scrb_warped_deadline = timestamp + warp_bucket->scrb_warp_remaining; | |
448 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
449 | return warp_bucket; | |
450 | } | |
451 | if (warp_bucket->scrb_warped_deadline > timestamp) { | |
452 | /* Root bucket already has a warp window open with some warp remaining */ | |
453 | sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp); | |
454 | return warp_bucket; | |
455 | } | |
456 | ||
457 | /* For this bucket, warp window was opened sometime in the past but has now | |
458 | * expired. Mark the bucket as not avilable for warp anymore and re-run the | |
459 | * warp bucket selection logic. | |
460 | */ | |
461 | warp_bucket->scrb_warp_remaining = 0; | |
462 | bitmap_clear(root_clutch->scr_warp_available, warp_bucket->scrb_bucket); | |
463 | goto evaluate_warp_buckets; | |
464 | } | |
465 | ||
466 | /* | |
467 | * sched_clutch_root_bucket_deadline_calculate() | |
468 | * | |
469 | * Calculate the deadline for the bucket based on its WCEL | |
470 | */ | |
471 | static uint64_t | |
472 | sched_clutch_root_bucket_deadline_calculate( | |
473 | sched_clutch_root_bucket_t root_bucket, | |
474 | uint64_t timestamp) | |
475 | { | |
476 | /* For fixpri AboveUI bucket always return it as the earliest deadline */ | |
477 | if (root_bucket->scrb_bucket < TH_BUCKET_SHARE_FG) { | |
478 | return 0; | |
479 | } | |
480 | ||
481 | /* For all timeshare buckets set the deadline as current time + worst-case-execution-latency */ | |
482 | return timestamp + sched_clutch_root_bucket_wcel[root_bucket->scrb_bucket]; | |
483 | } | |
484 | ||
485 | /* | |
486 | * sched_clutch_root_bucket_deadline_update() | |
487 | * | |
488 | * Routine to update the deadline of the root bucket when it is selected. | |
489 | * Updating the deadline also moves the root_bucket in the EDF priority | |
490 | * queue. | |
491 | */ | |
492 | static void | |
493 | sched_clutch_root_bucket_deadline_update( | |
494 | sched_clutch_root_bucket_t root_bucket, | |
495 | sched_clutch_root_t root_clutch, | |
496 | uint64_t timestamp) | |
497 | { | |
498 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
499 | /* The algorithm never uses the deadlines for scheduling TH_BUCKET_FIXPRI bucket */ | |
500 | return; | |
501 | } | |
502 | uint64_t old_deadline = root_bucket->scrb_deadline; | |
503 | uint64_t new_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
504 | assert(old_deadline <= new_deadline); | |
505 | if (old_deadline != new_deadline) { | |
506 | root_bucket->scrb_deadline = new_deadline; | |
507 | /* Since the priority queue is a min-heap, use the decrease routine even though the deadline has a larger value now */ | |
508 | priority_queue_entry_decrease(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
509 | } | |
510 | } | |
511 | ||
512 | /* | |
513 | * sched_clutch_root_bucket_runnable() | |
514 | * | |
515 | * Routine to insert a newly runnable root bucket into the hierarchy. | |
516 | * Also updates the deadline and warp parameters as necessary. | |
517 | */ | |
518 | static void | |
519 | sched_clutch_root_bucket_runnable( | |
520 | sched_clutch_root_bucket_t root_bucket, | |
521 | sched_clutch_root_t root_clutch, | |
522 | uint64_t timestamp) | |
523 | { | |
524 | /* Mark the root bucket as runnable */ | |
525 | bitmap_set(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
526 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
527 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, 0, 0, 0); | |
528 | ||
529 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
530 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
531 | return; | |
532 | } | |
533 | ||
534 | root_bucket->scrb_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp); | |
535 | priority_queue_insert(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare); | |
536 | ||
537 | if (root_bucket->scrb_warp_remaining) { | |
538 | /* Since the bucket has some warp remaining and its now runnable, mark it as available for warp */ | |
539 | bitmap_set(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
540 | } | |
541 | } | |
542 | ||
543 | /* | |
544 | * sched_clutch_root_bucket_empty() | |
545 | * | |
546 | * Routine to remove an empty root bucket from the hierarchy. | |
547 | * Also updates the deadline and warp parameters as necessary. | |
548 | */ | |
549 | static void | |
550 | sched_clutch_root_bucket_empty( | |
551 | sched_clutch_root_bucket_t root_bucket, | |
552 | sched_clutch_root_t root_clutch, | |
553 | uint64_t timestamp) | |
554 | { | |
555 | bitmap_clear(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket); | |
556 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE, | |
557 | root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0, 0); | |
558 | ||
559 | if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) { | |
560 | /* Since the TH_BUCKET_FIXPRI bucket is not scheduled based on deadline, nothing more needed here */ | |
561 | return; | |
562 | } | |
563 | ||
564 | priority_queue_remove(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, sched_clutch_root_bucket_compare); | |
565 | ||
566 | bitmap_clear(root_clutch->scr_warp_available, root_bucket->scrb_bucket); | |
567 | if (root_bucket->scrb_warped_deadline > timestamp) { | |
568 | /* | |
569 | * For root buckets that were using the warp, check if the warp | |
570 | * deadline is in the future. If yes, remove the wall time the | |
571 | * warp was active and update the warp remaining. This allows | |
572 | * the root bucket to use the remaining warp the next time it | |
573 | * becomes runnable. | |
574 | */ | |
575 | root_bucket->scrb_warp_remaining = root_bucket->scrb_warped_deadline - timestamp; | |
576 | } else if (root_bucket->scrb_warped_deadline != SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) { | |
577 | /* | |
578 | * If the root bucket's warped deadline is in the past, it has used up | |
579 | * all the warp it was assigned. Empty out its warp remaining. | |
580 | */ | |
581 | root_bucket->scrb_warp_remaining = 0; | |
582 | } | |
583 | } | |
584 | ||
585 | /* | |
586 | * sched_clutch_root_pri_update() | |
587 | * | |
588 | * The root level priority is used for thread selection and preemption | |
589 | * logic. | |
590 | */ | |
591 | static void | |
592 | sched_clutch_root_pri_update( | |
593 | sched_clutch_root_t root_clutch) | |
594 | { | |
595 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
596 | if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) { | |
597 | /* No runnable root buckets */ | |
598 | root_clutch->scr_priority = NOPRI; | |
599 | assert(root_clutch->scr_urgency == 0); | |
600 | return; | |
601 | } | |
602 | sched_clutch_root_bucket_t root_bucket = NULL; | |
603 | /* Special case for AboveUI (uses same logic as thread selection) */ | |
604 | if (sched_clutch_root_select_aboveui(root_clutch)) { | |
605 | root_bucket = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI]; | |
606 | } else { | |
607 | /* | |
608 | * AboveUI bucket is not runnable or has a low clutch bucket priority, | |
609 | * select the next runnable root bucket in natural priority order. This logic | |
610 | * is slightly different from thread selection, because thread selection | |
611 | * considers deadlines, warps etc. to decide the most optimal bucket at a | |
612 | * given timestamp. Since the priority value is used for preemption decisions | |
613 | * only, it needs to be based on the highest runnable thread available in | |
614 | * the timeshare domain. | |
615 | */ | |
616 | int root_bucket_index = bitmap_lsb_next(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX, TH_BUCKET_FIXPRI); | |
617 | assert(root_bucket_index != -1); | |
618 | root_bucket = &root_clutch->scr_buckets[root_bucket_index]; | |
619 | } | |
620 | /* For the selected root bucket, find the highest priority clutch bucket */ | |
621 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
622 | root_clutch->scr_priority = priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
623 | } | |
624 | ||
625 | /* | |
626 | * sched_clutch_root_urgency_inc() | |
627 | * | |
628 | * Routine to increment the urgency at the root level based on the thread | |
629 | * priority that is being inserted into the hierarchy. The root urgency | |
630 | * counter is updated based on the urgency of threads in any of the | |
631 | * clutch buckets which are part of the hierarchy. | |
632 | * | |
633 | * Always called with the pset lock held. | |
634 | */ | |
635 | static void | |
636 | sched_clutch_root_urgency_inc( | |
637 | sched_clutch_root_t root_clutch, | |
638 | thread_t thread) | |
639 | { | |
640 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
641 | root_clutch->scr_urgency++; | |
642 | } | |
643 | } | |
644 | ||
645 | /* | |
646 | * sched_clutch_root_urgency_dec() | |
647 | * | |
648 | * Routine to decrement the urgency at the root level based on the thread | |
649 | * priority that is being removed from the hierarchy. The root urgency | |
650 | * counter is updated based on the urgency of threads in any of the | |
651 | * clutch buckets which are part of the hierarchy. | |
652 | * | |
653 | * Always called with the pset lock held. | |
654 | */ | |
655 | static void | |
656 | sched_clutch_root_urgency_dec( | |
657 | sched_clutch_root_t root_clutch, | |
658 | thread_t thread) | |
659 | { | |
660 | if (SCHED(priority_is_urgent)(thread->sched_pri)) { | |
661 | root_clutch->scr_urgency--; | |
662 | } | |
663 | } | |
664 | ||
665 | /* | |
666 | * Clutch bucket level scheduling | |
667 | * | |
668 | * The second level of scheduling is the clutch bucket level scheduling | |
669 | * which tries to schedule thread groups within root_buckets. Each | |
670 | * clutch represents a thread group and a clutch_bucket represents | |
671 | * threads at a particular sched_bucket within that thread group. The | |
672 | * goal of this level of scheduling is to allow interactive thread | |
673 | * groups low latency access to the CPU. It also provides slight | |
674 | * scheduling preference for App and unrestricted thread groups. | |
675 | * | |
676 | * The clutch bucket scheduling algorithm measures an interactivity | |
677 | * score for all clutch buckets. The interactivity score is based | |
678 | * on the ratio of the CPU used and the voluntary blocking of threads | |
679 | * within the clutch bucket. The algorithm is very close to the ULE | |
680 | * scheduler on FreeBSD in terms of calculations. The interactivity | |
681 | * score provides an interactivity boost in the range of | |
682 | * [0:SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI * 2] which allows interactive | |
683 | * thread groups to win over CPU spinners. | |
684 | */ | |
685 | ||
686 | /* Priority boost range for interactivity */ | |
687 | #define SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT (8) | |
688 | uint8_t sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
689 | ||
690 | /* window to scale the cpu usage and blocked values (currently 500ms). Its the threshold of used+blocked */ | |
691 | uint64_t sched_clutch_bucket_adjust_threshold = 0; | |
692 | #define SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS (500000) | |
693 | ||
694 | /* The ratio to scale the cpu/blocked time per window */ | |
695 | #define SCHED_CLUTCH_BUCKET_ADJUST_RATIO (10) | |
696 | ||
697 | /* rate at which interactivity score is recalculated. This keeps the score smooth in terms of extremely bursty behavior */ | |
698 | uint64_t sched_clutch_bucket_interactivity_delta = 0; | |
699 | #define SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT (25000) | |
700 | ||
701 | /* | |
702 | * In order to allow App thread groups some preference over daemon thread | |
703 | * groups, the App clutch_buckets get a 8 point boost. The boost value should | |
704 | * be chosen such that badly behaved apps are still penalized over well | |
705 | * behaved interactive daemon clutch_buckets. | |
706 | */ | |
707 | #define SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT (8) | |
708 | uint8_t sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
709 | ||
710 | /* Initial value for voluntary blocking time for the clutch_bucket */ | |
711 | #define SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID (uint32_t)(~0) | |
712 | ||
713 | /* | |
714 | * sched_clutch_bucket_init() | |
715 | * | |
716 | * Initializer for clutch buckets. | |
717 | */ | |
718 | static void | |
719 | sched_clutch_bucket_init( | |
720 | sched_clutch_bucket_t clutch_bucket, | |
721 | sched_clutch_t clutch, | |
722 | sched_bucket_t bucket) | |
723 | { | |
724 | bzero(clutch_bucket, sizeof(struct sched_clutch_bucket)); | |
725 | ||
726 | clutch_bucket->scb_bucket = bucket; | |
727 | /* scb_priority will be recalculated when a thread is inserted in the clutch bucket */ | |
728 | clutch_bucket->scb_priority = 0; | |
729 | /* | |
730 | * All thread groups should be initialized to be interactive; this allows the newly launched | |
731 | * thread groups to fairly compete with already running thread groups. | |
732 | */ | |
733 | clutch_bucket->scb_interactivity_score = (sched_clutch_bucket_interactive_pri * 2); | |
734 | clutch_bucket->scb_foreign = false; | |
735 | ||
736 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, 0, relaxed); | |
737 | os_atomic_store(&clutch_bucket->scb_pri_shift, INT8_MAX, relaxed); | |
738 | ||
739 | clutch_bucket->scb_interactivity_ts = 0; | |
740 | clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID; | |
741 | priority_queue_entry_init(&clutch_bucket->scb_pqlink); | |
742 | clutch_bucket->scb_clutch = clutch; | |
743 | clutch_bucket->scb_root = NULL; | |
744 | priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP); | |
745 | run_queue_init(&clutch_bucket->scb_runq); | |
746 | } | |
747 | ||
748 | /* | |
749 | * sched_clutch_init_with_thread_group() | |
750 | * | |
751 | * Initialize the sched_clutch when the thread group is being created | |
752 | */ | |
753 | void | |
754 | sched_clutch_init_with_thread_group( | |
755 | sched_clutch_t clutch, | |
756 | struct thread_group *tg) | |
757 | { | |
758 | os_atomic_store(&clutch->sc_thr_count, 0, relaxed); | |
759 | ||
760 | /* Initialize all the clutch buckets */ | |
761 | for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) { | |
762 | sched_clutch_bucket_init(&(clutch->sc_clutch_buckets[i]), clutch, i); | |
763 | } | |
764 | ||
765 | /* Grouping specific fields */ | |
766 | clutch->sc_tg = tg; | |
767 | os_atomic_store(&clutch->sc_tg_priority, 0, relaxed); | |
768 | } | |
769 | ||
770 | /* | |
771 | * sched_clutch_destroy() | |
772 | * | |
773 | * Destructor for clutch; called from thread group release code. | |
774 | */ | |
775 | void | |
776 | sched_clutch_destroy( | |
777 | __unused sched_clutch_t clutch) | |
778 | { | |
779 | assert(os_atomic_load(&clutch->sc_thr_count, relaxed) == 0); | |
780 | } | |
781 | ||
c6bf4f31 A |
782 | #if __AMP__ |
783 | ||
784 | /* | |
785 | * sched_clutch_bucket_foreign() | |
786 | * | |
787 | * Identifies if the clutch bucket is a foreign (not recommended for) this | |
788 | * hierarchy. This is possible due to the recommended hierarchy/pset not | |
789 | * available for scheduling currently. | |
790 | */ | |
791 | static boolean_t | |
792 | sched_clutch_bucket_foreign(sched_clutch_root_t root_clutch, sched_clutch_bucket_t clutch_bucket) | |
793 | { | |
794 | assert(clutch_bucket->scb_thr_count > 0); | |
795 | if (!sched_clutch_pset_available(root_clutch->scr_pset)) { | |
796 | /* Even though the pset was not available for scheduling, threads | |
797 | * are being put in its runq (this might be due to the other pset | |
798 | * being turned off and this being the master processor pset). | |
799 | * Mark the clutch bucket as foreign so that when the other | |
800 | * pset becomes available, it moves the clutch bucket accordingly. | |
801 | */ | |
802 | return true; | |
803 | } | |
804 | thread_t thread = run_queue_peek(&clutch_bucket->scb_runq); | |
805 | pset_cluster_type_t pset_type = recommended_pset_type(thread); | |
806 | return pset_type != root_clutch->scr_pset->pset_cluster_type; | |
807 | } | |
808 | ||
809 | #endif /* __AMP__ */ | |
cb323159 A |
810 | |
811 | /* | |
812 | * sched_clutch_bucket_hierarchy_insert() | |
813 | * | |
814 | * Routine to insert a newly runnable clutch_bucket into the root hierarchy. | |
815 | */ | |
816 | static void | |
817 | sched_clutch_bucket_hierarchy_insert( | |
818 | sched_clutch_root_t root_clutch, | |
819 | sched_clutch_bucket_t clutch_bucket, | |
820 | sched_bucket_t bucket, | |
821 | uint64_t timestamp) | |
822 | { | |
823 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
824 | if (bucket > TH_BUCKET_FIXPRI) { | |
825 | /* Enqueue the timeshare clutch buckets into the global runnable clutch_bucket list; used for sched tick operations */ | |
826 | enqueue_tail(&root_clutch->scr_clutch_buckets, &clutch_bucket->scb_listlink); | |
827 | } | |
c6bf4f31 A |
828 | #if __AMP__ |
829 | /* Check if the bucket is a foreign clutch bucket and add it to the foreign buckets list */ | |
830 | if (sched_clutch_bucket_foreign(root_clutch, clutch_bucket)) { | |
831 | clutch_bucket->scb_foreign = true; | |
832 | enqueue_tail(&root_clutch->scr_foreign_buckets, &clutch_bucket->scb_foreignlink); | |
833 | } | |
834 | #endif /* __AMP__ */ | |
cb323159 A |
835 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; |
836 | ||
837 | /* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */ | |
838 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
839 | sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp); | |
840 | } | |
841 | ||
842 | /* Insert the clutch bucket into the root bucket priority queue */ | |
843 | priority_queue_insert(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
844 | os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed); | |
845 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
846 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0); | |
847 | } | |
848 | ||
849 | /* | |
850 | * sched_clutch_bucket_hierarchy_remove() | |
851 | * | |
852 | * Rotuine to remove a empty clutch bucket from the root hierarchy. | |
853 | */ | |
854 | static void | |
855 | sched_clutch_bucket_hierarchy_remove( | |
856 | sched_clutch_root_t root_clutch, | |
857 | sched_clutch_bucket_t clutch_bucket, | |
858 | sched_bucket_t bucket, | |
859 | uint64_t timestamp) | |
860 | { | |
861 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
862 | if (bucket > TH_BUCKET_FIXPRI) { | |
863 | /* Remove the timeshare clutch bucket from the globally runnable clutch_bucket list */ | |
864 | remqueue(&clutch_bucket->scb_listlink); | |
865 | } | |
c6bf4f31 A |
866 | #if __AMP__ |
867 | if (clutch_bucket->scb_foreign) { | |
868 | clutch_bucket->scb_foreign = false; | |
869 | remqueue(&clutch_bucket->scb_foreignlink); | |
870 | } | |
871 | #endif /* __AMP__ */ | |
cb323159 A |
872 | |
873 | sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket]; | |
874 | ||
875 | /* Remove the clutch bucket from the root bucket priority queue */ | |
876 | priority_queue_remove(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
877 | os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed); | |
878 | clutch_bucket->scb_blocked_ts = timestamp; | |
879 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE, | |
880 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0); | |
881 | ||
882 | /* If the root bucket priority queue is now empty, remove it from the root priority queue */ | |
883 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
884 | sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp); | |
885 | } | |
886 | } | |
887 | ||
888 | /* | |
889 | * sched_clutch_bucket_base_pri() | |
890 | * | |
891 | * Calculates the "base" priority of the clutch bucket. The base | |
892 | * priority of the clutch bucket is the sum of the max of highest | |
893 | * base_pri and highest sched_pri in the clutch bucket and any | |
894 | * grouping specific (App/Daemon...) boosts applicable to the | |
895 | * clutch_bucket. | |
896 | */ | |
897 | static uint8_t | |
898 | sched_clutch_bucket_base_pri( | |
899 | sched_clutch_bucket_t clutch_bucket) | |
900 | { | |
901 | uint8_t clutch_boost = 0; | |
902 | assert(clutch_bucket->scb_runq.count != 0); | |
903 | ||
904 | sched_clutch_t clutch = clutch_bucket->scb_clutch; | |
905 | ||
906 | /* | |
907 | * Since the clutch bucket can contain threads that are members of the group due | |
908 | * to the sched_pri being promoted or due to their base pri, the base priority of | |
909 | * the entire clutch bucket should be based on the highest thread (promoted or base) | |
910 | * in the clutch bucket. | |
911 | */ | |
912 | uint8_t max_pri = priority_queue_empty(&clutch_bucket->scb_clutchpri_prioq) ? 0 : priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq); | |
913 | ||
914 | /* | |
915 | * For all AboveUI clutch buckets and clutch buckets for thread groups that | |
916 | * havent been specified as SCHED_CLUTCH_TG_PRI_LOW, give a priority boost | |
917 | */ | |
918 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
919 | (os_atomic_load(&clutch->sc_tg_priority, relaxed) != SCHED_CLUTCH_TG_PRI_LOW)) { | |
920 | clutch_boost = sched_clutch_bucket_pri_boost; | |
921 | } | |
922 | return max_pri + clutch_boost; | |
923 | } | |
924 | ||
925 | /* | |
926 | * sched_clutch_bucket_interactivity_score_calculate() | |
927 | * | |
928 | * Routine to calculate the interactivity score for the clutch bucket. The | |
929 | * interactivity score is based on the ratio of CPU used by all threads in | |
930 | * the bucket and the blocked time of the bucket as a whole. | |
931 | */ | |
932 | static uint8_t | |
933 | sched_clutch_bucket_interactivity_score_calculate( | |
934 | sched_clutch_bucket_t clutch_bucket, | |
935 | uint64_t timestamp) | |
936 | { | |
937 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
938 | /* | |
939 | * Since the root bucket selection algorithm for Above UI looks at clutch bucket | |
940 | * priorities, make sure all AboveUI buckets are marked interactive. | |
941 | */ | |
942 | assert(clutch_bucket->scb_interactivity_score == (2 * sched_clutch_bucket_interactive_pri)); | |
943 | return clutch_bucket->scb_interactivity_score; | |
944 | } | |
945 | ||
946 | if (clutch_bucket->scb_interactivity_ts == 0) { | |
947 | /* | |
948 | * This indicates a newly initialized clutch bucket; return the default interactivity score | |
949 | * and update timestamp. | |
950 | */ | |
951 | clutch_bucket->scb_interactivity_ts = timestamp; | |
952 | return clutch_bucket->scb_interactivity_score; | |
953 | } | |
954 | ||
955 | if (timestamp < (clutch_bucket->scb_interactivity_ts + sched_clutch_bucket_interactivity_delta)) { | |
956 | return clutch_bucket->scb_interactivity_score; | |
957 | } | |
958 | ||
959 | /* Check if the clutch bucket accounting needs to be scaled */ | |
960 | sched_clutch_bucket_cpu_adjust(clutch_bucket); | |
961 | clutch_bucket->scb_interactivity_ts = timestamp; | |
962 | ||
963 | sched_clutch_bucket_cpu_data_t scb_cpu_data; | |
964 | scb_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
965 | clutch_cpu_data_t cpu_used = scb_cpu_data.cpu_data.scbcd_cpu_used; | |
966 | clutch_cpu_data_t cpu_blocked = scb_cpu_data.cpu_data.scbcd_cpu_blocked; | |
967 | ||
968 | /* | |
969 | * In extremely CPU contended cases, it is possible that the clutch bucket has been runnable | |
970 | * for a long time but none of its threads have been picked up for execution. In that case, both | |
971 | * the CPU used and blocked would be 0. | |
972 | */ | |
973 | if ((cpu_blocked == 0) && (cpu_used == 0)) { | |
974 | return clutch_bucket->scb_interactivity_score; | |
975 | } | |
976 | ||
977 | /* | |
978 | * For all timeshare buckets, calculate the interactivity score of the bucket | |
979 | * and add it to the base priority | |
980 | */ | |
981 | uint8_t interactive_score = 0; | |
982 | if (cpu_blocked > cpu_used) { | |
983 | /* Interactive clutch_bucket case */ | |
984 | interactive_score = sched_clutch_bucket_interactive_pri + | |
985 | ((sched_clutch_bucket_interactive_pri * (cpu_blocked - cpu_used)) / cpu_blocked); | |
986 | } else { | |
987 | /* Non-interactive clutch_bucket case */ | |
988 | interactive_score = ((sched_clutch_bucket_interactive_pri * cpu_blocked) / cpu_used); | |
989 | } | |
990 | clutch_bucket->scb_interactivity_score = interactive_score; | |
991 | return interactive_score; | |
992 | } | |
993 | ||
994 | /* | |
995 | * sched_clutch_bucket_pri_calculate() | |
996 | * | |
997 | * The priority calculation algorithm for the clutch_bucket is a slight | |
998 | * modification on the ULE interactivity score. It uses the base priority | |
999 | * of the clutch bucket and applies an interactivity score boost to the | |
1000 | * highly responsive clutch buckets. | |
1001 | */ | |
1002 | ||
1003 | static uint8_t | |
1004 | sched_clutch_bucket_pri_calculate( | |
1005 | sched_clutch_bucket_t clutch_bucket, | |
1006 | uint64_t timestamp) | |
1007 | { | |
1008 | /* For empty clutch buckets, return priority 0 */ | |
1009 | if (clutch_bucket->scb_thr_count == 0) { | |
1010 | return 0; | |
1011 | } | |
1012 | ||
1013 | uint8_t base_pri = sched_clutch_bucket_base_pri(clutch_bucket); | |
1014 | uint8_t interactive_score = sched_clutch_bucket_interactivity_score_calculate(clutch_bucket, timestamp); | |
1015 | ||
1016 | assert(((uint64_t)base_pri + interactive_score) <= UINT8_MAX); | |
1017 | uint8_t pri = base_pri + interactive_score; | |
1018 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_PRI) | DBG_FUNC_NONE, | |
1019 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, pri, interactive_score, 0); | |
1020 | return pri; | |
1021 | } | |
1022 | ||
1023 | /* | |
1024 | * sched_clutch_root_bucket_highest_clutch_bucket() | |
1025 | * | |
1026 | * Routine to find the highest priority clutch bucket | |
1027 | * within the root bucket. | |
1028 | */ | |
1029 | static sched_clutch_bucket_t | |
1030 | sched_clutch_root_bucket_highest_clutch_bucket( | |
1031 | sched_clutch_root_bucket_t root_bucket) | |
1032 | { | |
1033 | if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) { | |
1034 | return NULL; | |
1035 | } | |
1036 | return priority_queue_max(&root_bucket->scrb_clutch_buckets, struct sched_clutch_bucket, scb_pqlink); | |
1037 | } | |
1038 | ||
1039 | /* | |
1040 | * sched_clutch_bucket_runnable() | |
1041 | * | |
1042 | * Perform all operations needed when a new clutch bucket becomes runnable. | |
1043 | * It involves inserting the clutch_bucket into the hierarchy and updating the | |
1044 | * root priority appropriately. | |
1045 | */ | |
1046 | static boolean_t | |
1047 | sched_clutch_bucket_runnable( | |
1048 | sched_clutch_bucket_t clutch_bucket, | |
1049 | sched_clutch_root_t root_clutch, | |
1050 | uint64_t timestamp) | |
1051 | { | |
1052 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1053 | sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp); | |
1054 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
1055 | sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp); | |
1056 | /* Update the timesharing properties of this clutch_bucket; also done every sched_tick */ | |
1057 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
1058 | int16_t root_old_pri = root_clutch->scr_priority; | |
1059 | sched_clutch_root_pri_update(root_clutch); | |
1060 | return root_clutch->scr_priority > root_old_pri; | |
1061 | } | |
1062 | ||
1063 | /* | |
1064 | * sched_clutch_bucket_update() | |
1065 | * | |
1066 | * Update the clutch_bucket's position in the hierarchy based on whether | |
1067 | * the newly runnable thread changes its priority. Also update the root | |
1068 | * priority accordingly. | |
1069 | */ | |
1070 | static boolean_t | |
1071 | sched_clutch_bucket_update( | |
1072 | sched_clutch_bucket_t clutch_bucket, | |
1073 | sched_clutch_root_t root_clutch, | |
1074 | uint64_t timestamp) | |
1075 | { | |
1076 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1077 | uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
1078 | if (new_pri == clutch_bucket->scb_priority) { | |
1079 | return false; | |
1080 | } | |
1081 | struct priority_queue *bucket_prioq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets; | |
1082 | ||
1083 | if (new_pri < clutch_bucket->scb_priority) { | |
1084 | clutch_bucket->scb_priority = new_pri; | |
1085 | priority_queue_entry_decrease(bucket_prioq, &clutch_bucket->scb_pqlink, | |
1086 | clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1087 | } else { | |
1088 | clutch_bucket->scb_priority = new_pri; | |
1089 | priority_queue_entry_increase(bucket_prioq, &clutch_bucket->scb_pqlink, | |
1090 | clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1091 | } | |
1092 | ||
1093 | int16_t root_old_pri = root_clutch->scr_priority; | |
1094 | sched_clutch_root_pri_update(root_clutch); | |
1095 | return root_clutch->scr_priority > root_old_pri; | |
1096 | } | |
1097 | ||
1098 | /* | |
1099 | * sched_clutch_bucket_empty() | |
1100 | * | |
1101 | * Perform all the operations needed when a clutch_bucket is no longer runnable. | |
1102 | * It involves removing the clutch bucket from the hierarchy and updaing the root | |
1103 | * priority appropriately. | |
1104 | */ | |
1105 | static void | |
1106 | sched_clutch_bucket_empty( | |
1107 | sched_clutch_bucket_t clutch_bucket, | |
1108 | sched_clutch_root_t root_clutch, | |
1109 | uint64_t timestamp) | |
1110 | { | |
1111 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1112 | sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp); | |
1113 | clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp); | |
1114 | sched_clutch_root_pri_update(root_clutch); | |
1115 | } | |
1116 | ||
1117 | /* | |
1118 | * sched_clutch_cpu_usage_update() | |
1119 | * | |
1120 | * Routine to update CPU usage of the thread in the hierarchy. | |
1121 | */ | |
1122 | void | |
1123 | sched_clutch_cpu_usage_update( | |
1124 | thread_t thread, | |
1125 | uint64_t delta) | |
1126 | { | |
1127 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1128 | return; | |
1129 | } | |
1130 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1131 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1132 | sched_clutch_bucket_cpu_usage_update(clutch_bucket, delta); | |
1133 | } | |
1134 | ||
1135 | /* | |
1136 | * sched_clutch_bucket_cpu_usage_update() | |
1137 | * | |
1138 | * Routine to update the CPU usage of the clutch_bucket. | |
1139 | */ | |
1140 | static void | |
1141 | sched_clutch_bucket_cpu_usage_update( | |
1142 | sched_clutch_bucket_t clutch_bucket, | |
1143 | uint64_t delta) | |
1144 | { | |
1145 | if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) { | |
1146 | /* Since Above UI bucket has maximum interactivity score always, nothing to do here */ | |
1147 | return; | |
1148 | } | |
1149 | ||
1150 | /* | |
1151 | * The CPU usage should not overflow the clutch_cpu_data_t type. Since the usage is used to | |
1152 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1153 | */ | |
1154 | delta = MIN(delta, CLUTCH_CPU_DATA_MAX); | |
1155 | os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_used), (clutch_cpu_data_t)delta, relaxed); | |
1156 | } | |
1157 | ||
1158 | /* | |
1159 | * sched_clutch_bucket_cpu_blocked_update() | |
1160 | * | |
1161 | * Routine to update CPU blocked time for clutch_bucket. | |
1162 | */ | |
1163 | static void | |
1164 | sched_clutch_bucket_cpu_blocked_update( | |
1165 | sched_clutch_bucket_t clutch_bucket, | |
1166 | uint64_t timestamp) | |
1167 | { | |
1168 | if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) || | |
1169 | (clutch_bucket->scb_blocked_ts == SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID)) { | |
1170 | /* For Above UI bucket and a newly initialized clutch bucket, nothing to do here */ | |
1171 | return; | |
1172 | } | |
1173 | ||
1174 | uint64_t blocked_time = timestamp - clutch_bucket->scb_blocked_ts; | |
1175 | if (blocked_time > sched_clutch_bucket_adjust_threshold) { | |
1176 | blocked_time = sched_clutch_bucket_adjust_threshold; | |
1177 | } | |
1178 | ||
1179 | /* | |
1180 | * The CPU blocked should not overflow the clutch_cpu_data_t type. Since the blocked is used to | |
1181 | * calculate interactivity score, it is safe to restrict it to CLUTCH_CPU_DATA_MAX. | |
1182 | */ | |
1183 | blocked_time = MIN(blocked_time, CLUTCH_CPU_DATA_MAX); | |
1184 | clutch_cpu_data_t __assert_only cpu_blocked_orig = os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_blocked), (clutch_cpu_data_t)blocked_time, relaxed); | |
1185 | /* The blocked time is scaled every so often, it should never overflow */ | |
1186 | assert(blocked_time <= (CLUTCH_CPU_DATA_MAX - cpu_blocked_orig)); | |
1187 | } | |
1188 | ||
1189 | /* | |
1190 | * sched_clutch_bucket_cpu_adjust() | |
1191 | * | |
1192 | * Routine to scale the cpu usage and blocked time once the sum gets bigger | |
1193 | * than sched_clutch_bucket_adjust_threshold. Allows the values to remain | |
1194 | * manageable and maintain the same ratio while allowing clutch buckets to | |
1195 | * adjust behavior and reflect in the interactivity score in a reasonable | |
1196 | * amount of time. | |
1197 | */ | |
1198 | static void | |
1199 | sched_clutch_bucket_cpu_adjust( | |
1200 | sched_clutch_bucket_t clutch_bucket) | |
1201 | { | |
1202 | sched_clutch_bucket_cpu_data_t old_cpu_data = {}; | |
1203 | sched_clutch_bucket_cpu_data_t new_cpu_data = {}; | |
1204 | do { | |
1205 | old_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed); | |
1206 | clutch_cpu_data_t cpu_used = old_cpu_data.cpu_data.scbcd_cpu_used; | |
1207 | clutch_cpu_data_t cpu_blocked = old_cpu_data.cpu_data.scbcd_cpu_blocked; | |
1208 | if ((cpu_used + cpu_blocked) < sched_clutch_bucket_adjust_threshold) { | |
1209 | return; | |
1210 | } | |
1211 | ||
1212 | /* | |
1213 | * The accumulation of CPU used and blocked is past the threshold; scale it | |
1214 | * down to lose old history. | |
1215 | */ | |
1216 | new_cpu_data.cpu_data.scbcd_cpu_used = cpu_used / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1217 | new_cpu_data.cpu_data.scbcd_cpu_blocked = cpu_blocked / SCHED_CLUTCH_BUCKET_ADJUST_RATIO; | |
1218 | } while (!os_atomic_cmpxchg(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, old_cpu_data.scbcd_cpu_data_packed, new_cpu_data.scbcd_cpu_data_packed, relaxed)); | |
1219 | } | |
1220 | ||
1221 | /* | |
1222 | * Thread level scheduling algorithm | |
1223 | * | |
1224 | * The thread level scheduling algorithm uses the mach timeshare | |
1225 | * decay based algorithm to achieve sharing between threads within the | |
1226 | * same clutch bucket. The load/priority shifts etc. are all maintained | |
1227 | * at the clutch bucket level and used for decay calculation of the | |
1228 | * threads. The load sampling is still driven off the scheduler tick | |
1229 | * for runnable clutch buckets (it does not use the new higher frequency | |
1230 | * EWMA based load calculation). The idea is that the contention and load | |
1231 | * within clutch_buckets should be limited enough to not see heavy decay | |
1232 | * and timeshare effectively. | |
1233 | */ | |
1234 | ||
1235 | /* | |
1236 | * sched_clutch_thread_run_bucket_incr() / sched_clutch_run_bucket_incr() | |
1237 | * | |
1238 | * Increment the run count for the clutch bucket associated with the | |
1239 | * thread. | |
1240 | */ | |
1241 | uint32_t | |
1242 | sched_clutch_thread_run_bucket_incr( | |
1243 | thread_t thread, | |
1244 | sched_bucket_t bucket) | |
1245 | { | |
1246 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1247 | return 0; | |
1248 | } | |
1249 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1250 | return sched_clutch_run_bucket_incr(clutch, bucket); | |
1251 | } | |
1252 | ||
1253 | static uint32_t | |
1254 | sched_clutch_run_bucket_incr( | |
1255 | sched_clutch_t clutch, | |
1256 | sched_bucket_t bucket) | |
1257 | { | |
1258 | assert(bucket != TH_BUCKET_RUN); | |
1259 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1260 | uint32_t result = os_atomic_inc(&(clutch_bucket->scb_run_count), relaxed); | |
1261 | return result; | |
1262 | } | |
1263 | ||
1264 | /* | |
1265 | * sched_clutch_thread_run_bucket_decr() / sched_clutch_run_bucket_decr() | |
1266 | * | |
1267 | * Decrement the run count for the clutch bucket associated with the | |
1268 | * thread. | |
1269 | */ | |
1270 | uint32_t | |
1271 | sched_clutch_thread_run_bucket_decr( | |
1272 | thread_t thread, | |
1273 | sched_bucket_t bucket) | |
1274 | { | |
1275 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1276 | return 0; | |
1277 | } | |
1278 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1279 | return sched_clutch_run_bucket_decr(clutch, bucket); | |
1280 | } | |
1281 | ||
1282 | static uint32_t | |
1283 | sched_clutch_run_bucket_decr( | |
1284 | sched_clutch_t clutch, | |
1285 | sched_bucket_t bucket) | |
1286 | { | |
1287 | assert(bucket != TH_BUCKET_RUN); | |
1288 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1289 | uint32_t result = os_atomic_dec(&(clutch_bucket->scb_run_count), relaxed); | |
1290 | return result; | |
1291 | } | |
1292 | ||
1293 | /* | |
1294 | * sched_clutch_bucket_timeshare_update() | |
1295 | * | |
1296 | * Routine to update the load and priority shift for the clutch_bucket every | |
1297 | * sched_tick. For runnable clutch_buckets, the sched tick handling code | |
1298 | * iterates the clutch buckets and calls this routine. For all others, the | |
1299 | * clutch_bucket maintains a "last updated schedtick" parameter. As threads | |
1300 | * become runnable in the clutch bucket, if this value is outdated, the load | |
1301 | * and shifts are updated. | |
1302 | * | |
1303 | * Possible optimization: | |
1304 | * - The current algorithm samples the load every sched tick (125ms). | |
1305 | * This is prone to spikes in runnable counts; if that turns out to be | |
1306 | * a problem, a simple solution would be to do the EWMA trick to sample | |
1307 | * load at every load_tick (30ms) and use the averaged value for the pri | |
1308 | * shift calculation. | |
1309 | */ | |
1310 | static void | |
1311 | sched_clutch_bucket_timeshare_update( | |
1312 | sched_clutch_bucket_t clutch_bucket) | |
1313 | { | |
1314 | if (clutch_bucket->scb_bucket < TH_BUCKET_SHARE_FG) { | |
1315 | return; | |
1316 | } | |
1317 | ||
1318 | /* | |
1319 | * Update the timeshare parameters for the clutch bucket if they havent been updated | |
1320 | * in this tick. | |
1321 | */ | |
1322 | uint32_t bucket_sched_ts = os_atomic_load(&clutch_bucket->scb_timeshare_tick, relaxed); | |
1323 | uint32_t current_sched_ts = sched_tick; | |
1324 | if (bucket_sched_ts != current_sched_ts) { | |
1325 | os_atomic_store(&clutch_bucket->scb_timeshare_tick, current_sched_ts, relaxed); | |
1326 | uint32_t bucket_load = (os_atomic_load(&clutch_bucket->scb_run_count, relaxed) / processor_avail_count); | |
1327 | bucket_load = MIN(bucket_load, NRQS - 1); | |
1328 | uint32_t pri_shift = sched_fixed_shift - sched_load_shifts[bucket_load]; | |
1329 | os_atomic_store(&clutch_bucket->scb_pri_shift, pri_shift, relaxed); | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | /* | |
1334 | * sched_clutch_thread_clutch_update() | |
1335 | * | |
1336 | * Routine called when the thread changes its thread group. The current | |
1337 | * implementation relies on the fact that the thread group is changed only | |
1338 | * from the context of the thread itself. Due to this fact, the thread | |
1339 | * group change causes only counter updates in the old & new clutch | |
1340 | * buckets and no hierarchy changes. The routine also attributes the CPU | |
1341 | * used so far to the old clutch. | |
1342 | */ | |
1343 | void | |
1344 | sched_clutch_thread_clutch_update( | |
1345 | thread_t thread, | |
1346 | sched_clutch_t old_clutch, | |
1347 | sched_clutch_t new_clutch) | |
1348 | { | |
1349 | uint32_t cpu_delta; | |
1350 | assert(current_thread() == thread); | |
1351 | ||
1352 | if (old_clutch) { | |
1353 | sched_clutch_run_bucket_decr(old_clutch, thread->th_sched_bucket); | |
1354 | /* | |
1355 | * Calculate the CPU used by this thread in the old bucket and | |
1356 | * add it to the old clutch bucket. This uses the same CPU usage | |
1357 | * logic as update_priority etc. | |
1358 | */ | |
1359 | thread_timer_delta(thread, cpu_delta); | |
1360 | if (thread->pri_shift < INT8_MAX) { | |
1361 | thread->sched_usage += cpu_delta; | |
1362 | } | |
1363 | thread->cpu_delta += cpu_delta; | |
1364 | sched_clutch_bucket_cpu_usage_update(&(old_clutch->sc_clutch_buckets[thread->th_sched_bucket]), cpu_delta); | |
1365 | } | |
1366 | ||
1367 | if (new_clutch) { | |
1368 | sched_clutch_run_bucket_incr(new_clutch, thread->th_sched_bucket); | |
1369 | } | |
1370 | } | |
1371 | ||
1372 | /* Thread Insertion/Removal/Selection routines */ | |
1373 | ||
1374 | /* | |
1375 | * sched_clutch_thread_insert() | |
1376 | * | |
1377 | * Routine to insert a thread into the sched clutch hierarchy. | |
1378 | * Update the counts at all levels of the hierarchy and insert the nodes | |
1379 | * as they become runnable. Always called with the pset lock held. | |
1380 | */ | |
1381 | static boolean_t | |
1382 | sched_clutch_thread_insert( | |
1383 | sched_clutch_root_t root_clutch, | |
1384 | thread_t thread, | |
1385 | integer_t options) | |
1386 | { | |
1387 | boolean_t result = FALSE; | |
1388 | ||
1389 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1390 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1391 | assert(thread->thread_group == clutch->sc_tg); | |
1392 | ||
1393 | uint64_t current_timestamp = mach_absolute_time(); | |
1394 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1395 | assert((clutch_bucket->scb_root == NULL) || (clutch_bucket->scb_root == root_clutch)); | |
1396 | ||
1397 | /* Insert thread into the clutch_bucket runq using sched_pri */ | |
1398 | run_queue_enqueue(&clutch_bucket->scb_runq, thread, options); | |
1399 | /* Increment the urgency counter for the root if necessary */ | |
1400 | sched_clutch_root_urgency_inc(root_clutch, thread); | |
1401 | ||
1402 | /* Insert thread into clutch_bucket priority queue based on the promoted or base priority */ | |
1403 | priority_queue_insert(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1404 | sched_thread_sched_pri_promoted(thread) ? thread->sched_pri : thread->base_pri, | |
1405 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1406 | os_atomic_inc(&clutch->sc_thr_count, relaxed); | |
1407 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1408 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0); | |
1409 | ||
1410 | /* Enqueue the clutch into the hierarchy (if needed) and update properties */ | |
1411 | if (clutch_bucket->scb_thr_count == 0) { | |
1412 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1413 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
1414 | /* Insert the newly runnable clutch bucket into the hierarchy */ | |
1415 | result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp); | |
1416 | } else { | |
1417 | sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count); | |
1418 | sched_clutch_thr_count_inc(&root_clutch->scr_thr_count); | |
1419 | /* Update the position of the clutch bucket in the hierarchy */ | |
1420 | result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp); | |
1421 | } | |
1422 | return result; | |
1423 | } | |
1424 | ||
1425 | /* | |
1426 | * sched_clutch_thread_remove() | |
1427 | * | |
1428 | * Routine to remove a thread from the sched clutch hierarchy. | |
1429 | * Update the counts at all levels of the hierarchy and remove the nodes | |
1430 | * as they become empty. Always called with the pset lock held. | |
1431 | */ | |
1432 | static void | |
1433 | sched_clutch_thread_remove( | |
1434 | sched_clutch_root_t root_clutch, | |
1435 | thread_t thread, | |
1436 | uint64_t current_timestamp) | |
1437 | { | |
1438 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1439 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1440 | assert(thread->thread_group == clutch->sc_tg); | |
1441 | assert(thread->runq != PROCESSOR_NULL); | |
1442 | ||
1443 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
1444 | assert(clutch_bucket->scb_root == root_clutch); | |
1445 | ||
1446 | /* Decrement the urgency counter for the root if necessary */ | |
1447 | sched_clutch_root_urgency_dec(root_clutch, thread); | |
1448 | /* Remove thread from the clutch_bucket */ | |
1449 | run_queue_remove(&clutch_bucket->scb_runq, thread); | |
1450 | ||
1451 | priority_queue_remove(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link, | |
1452 | PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE); | |
1453 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE, | |
1454 | thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_EMPTY, 0); | |
1455 | ||
1456 | /* Update counts at various levels of the hierarchy */ | |
1457 | os_atomic_dec(&clutch->sc_thr_count, relaxed); | |
1458 | sched_clutch_thr_count_dec(&root_clutch->scr_thr_count); | |
1459 | sched_clutch_thr_count_dec(&clutch_bucket->scb_thr_count); | |
1460 | ||
1461 | /* Remove the clutch from hierarchy (if needed) and update properties */ | |
1462 | if (clutch_bucket->scb_thr_count == 0) { | |
1463 | sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp); | |
1464 | } else { | |
1465 | sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp); | |
1466 | } | |
1467 | } | |
1468 | ||
1469 | /* | |
1470 | * sched_clutch_thread_highest() | |
1471 | * | |
1472 | * Routine to find and remove the highest priority thread | |
1473 | * from the sched clutch hierarchy. The algorithm looks at the | |
1474 | * hierarchy for the most eligible runnable thread and calls | |
1475 | * sched_clutch_thread_remove(). Always called with the | |
1476 | * pset lock held. | |
1477 | */ | |
1478 | static thread_t | |
1479 | sched_clutch_thread_highest( | |
1480 | sched_clutch_root_t root_clutch) | |
1481 | { | |
1482 | sched_clutch_hierarchy_locked_assert(root_clutch); | |
1483 | uint64_t current_timestamp = mach_absolute_time(); | |
1484 | ||
1485 | /* Select the highest priority root bucket */ | |
1486 | sched_clutch_root_bucket_t root_bucket = sched_clutch_root_highest_root_bucket(root_clutch, current_timestamp); | |
1487 | if (root_bucket == NULL) { | |
1488 | return THREAD_NULL; | |
1489 | } | |
1490 | /* Since a thread is being picked from this root bucket, update its deadline */ | |
1491 | sched_clutch_root_bucket_deadline_update(root_bucket, root_clutch, current_timestamp); | |
1492 | ||
1493 | /* Find the highest priority clutch bucket in this root bucket */ | |
1494 | sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket); | |
1495 | assert(clutch_bucket != NULL); | |
1496 | ||
1497 | /* Find the highest priority runnable thread in this clutch bucket */ | |
1498 | thread_t thread = run_queue_peek(&clutch_bucket->scb_runq); | |
1499 | assert(thread != NULL); | |
1500 | ||
1501 | /* Remove and return the thread from the hierarchy */ | |
1502 | sched_clutch_thread_remove(root_clutch, thread, current_timestamp); | |
1503 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE, | |
1504 | thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0); | |
1505 | return thread; | |
1506 | } | |
1507 | ||
1508 | ||
1509 | /* High level global accessor routines */ | |
1510 | ||
1511 | /* | |
1512 | * sched_clutch_root_urgency() | |
1513 | * | |
1514 | * Routine to get the urgency of the highest runnable | |
1515 | * thread in the hierarchy. | |
1516 | */ | |
1517 | static uint32_t | |
1518 | sched_clutch_root_urgency( | |
1519 | sched_clutch_root_t root_clutch) | |
1520 | { | |
1521 | return root_clutch->scr_urgency; | |
1522 | } | |
1523 | ||
1524 | /* | |
1525 | * sched_clutch_root_count_sum() | |
1526 | * | |
1527 | * The count_sum mechanism is used for scheduler runq | |
1528 | * statistics calculation. Its only useful for debugging | |
1529 | * purposes; since it takes a mach_absolute_time() on | |
1530 | * other scheduler implementations, its better to avoid | |
1531 | * populating this until absolutely necessary. | |
1532 | */ | |
1533 | static uint32_t | |
1534 | sched_clutch_root_count_sum( | |
1535 | __unused sched_clutch_root_t root_clutch) | |
1536 | { | |
1537 | return 0; | |
1538 | } | |
1539 | ||
1540 | /* | |
1541 | * sched_clutch_root_priority() | |
1542 | * | |
1543 | * Routine to get the priority of the highest runnable | |
1544 | * thread in the hierarchy. | |
1545 | */ | |
1546 | static int | |
1547 | sched_clutch_root_priority( | |
1548 | sched_clutch_root_t root_clutch) | |
1549 | { | |
1550 | return root_clutch->scr_priority; | |
1551 | } | |
1552 | ||
1553 | /* | |
1554 | * sched_clutch_root_count() | |
1555 | * | |
1556 | * Returns total number of runnable threads in the hierarchy. | |
1557 | */ | |
1558 | uint32_t | |
1559 | sched_clutch_root_count( | |
1560 | sched_clutch_root_t root_clutch) | |
1561 | { | |
1562 | return root_clutch->scr_thr_count; | |
1563 | } | |
1564 | ||
1565 | /* | |
1566 | * sched_clutch_thread_pri_shift() | |
1567 | * | |
1568 | * Routine to get the priority shift value for a thread. | |
1569 | * Since the timesharing is done at the clutch_bucket level, | |
1570 | * this routine gets the clutch_bucket and retrieves the | |
1571 | * values from there. | |
1572 | */ | |
1573 | uint32_t | |
1574 | sched_clutch_thread_pri_shift( | |
1575 | thread_t thread, | |
1576 | sched_bucket_t bucket) | |
1577 | { | |
1578 | if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1579 | return UINT8_MAX; | |
1580 | } | |
1581 | assert(bucket != TH_BUCKET_RUN); | |
1582 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
1583 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]); | |
1584 | return os_atomic_load(&clutch_bucket->scb_pri_shift, relaxed); | |
1585 | } | |
1586 | ||
1587 | #pragma mark -- Clutch Scheduler Algorithm | |
1588 | ||
1589 | static void | |
1590 | sched_clutch_init(void); | |
1591 | ||
1592 | static void | |
1593 | sched_clutch_timebase_init(void); | |
1594 | ||
1595 | static thread_t | |
1596 | sched_clutch_steal_thread(processor_set_t pset); | |
1597 | ||
1598 | static void | |
1599 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context); | |
1600 | ||
1601 | static boolean_t | |
1602 | sched_clutch_processor_enqueue(processor_t processor, thread_t thread, | |
1603 | sched_options_t options); | |
1604 | ||
1605 | static boolean_t | |
1606 | sched_clutch_processor_queue_remove(processor_t processor, thread_t thread); | |
1607 | ||
1608 | static ast_t | |
1609 | sched_clutch_processor_csw_check(processor_t processor); | |
1610 | ||
1611 | static boolean_t | |
1612 | sched_clutch_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); | |
1613 | ||
1614 | static int | |
1615 | sched_clutch_runq_count(processor_t processor); | |
1616 | ||
1617 | static boolean_t | |
1618 | sched_clutch_processor_queue_empty(processor_t processor); | |
1619 | ||
1620 | static uint64_t | |
1621 | sched_clutch_runq_stats_count_sum(processor_t processor); | |
1622 | ||
1623 | static int | |
1624 | sched_clutch_processor_bound_count(processor_t processor); | |
1625 | ||
1626 | static void | |
1627 | sched_clutch_pset_init(processor_set_t pset); | |
1628 | ||
1629 | static void | |
1630 | sched_clutch_processor_init(processor_t processor); | |
1631 | ||
1632 | static thread_t | |
1633 | sched_clutch_choose_thread(processor_t processor, int priority, ast_t reason); | |
1634 | ||
1635 | static void | |
1636 | sched_clutch_processor_queue_shutdown(processor_t processor); | |
1637 | ||
1638 | static sched_mode_t | |
1639 | sched_clutch_initial_thread_sched_mode(task_t parent_task); | |
1640 | ||
1641 | static uint32_t | |
1642 | sched_clutch_initial_quantum_size(thread_t thread); | |
1643 | ||
1644 | static bool | |
1645 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread); | |
1646 | ||
1647 | static uint32_t | |
1648 | sched_clutch_run_incr(thread_t thread); | |
1649 | ||
1650 | static uint32_t | |
1651 | sched_clutch_run_decr(thread_t thread); | |
1652 | ||
1653 | static void | |
1654 | sched_clutch_update_thread_bucket(thread_t thread); | |
1655 | ||
1656 | const struct sched_dispatch_table sched_clutch_dispatch = { | |
1657 | .sched_name = "clutch", | |
1658 | .init = sched_clutch_init, | |
1659 | .timebase_init = sched_clutch_timebase_init, | |
1660 | .processor_init = sched_clutch_processor_init, | |
1661 | .pset_init = sched_clutch_pset_init, | |
1662 | .maintenance_continuation = sched_timeshare_maintenance_continue, | |
1663 | .choose_thread = sched_clutch_choose_thread, | |
1664 | .steal_thread_enabled = sched_steal_thread_enabled, | |
1665 | .steal_thread = sched_clutch_steal_thread, | |
1666 | .compute_timeshare_priority = sched_compute_timeshare_priority, | |
1667 | .choose_processor = choose_processor, | |
1668 | .processor_enqueue = sched_clutch_processor_enqueue, | |
1669 | .processor_queue_shutdown = sched_clutch_processor_queue_shutdown, | |
1670 | .processor_queue_remove = sched_clutch_processor_queue_remove, | |
1671 | .processor_queue_empty = sched_clutch_processor_queue_empty, | |
1672 | .priority_is_urgent = priority_is_urgent, | |
1673 | .processor_csw_check = sched_clutch_processor_csw_check, | |
1674 | .processor_queue_has_priority = sched_clutch_processor_queue_has_priority, | |
1675 | .initial_quantum_size = sched_clutch_initial_quantum_size, | |
1676 | .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode, | |
1677 | .can_update_priority = can_update_priority, | |
1678 | .update_priority = update_priority, | |
1679 | .lightweight_update_priority = lightweight_update_priority, | |
1680 | .quantum_expire = sched_default_quantum_expire, | |
1681 | .processor_runq_count = sched_clutch_runq_count, | |
1682 | .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum, | |
1683 | .processor_bound_count = sched_clutch_processor_bound_count, | |
1684 | .thread_update_scan = sched_clutch_thread_update_scan, | |
1685 | .multiple_psets_enabled = TRUE, | |
1686 | .sched_groups_enabled = FALSE, | |
1687 | .avoid_processor_enabled = TRUE, | |
1688 | .thread_avoid_processor = sched_clutch_thread_avoid_processor, | |
1689 | .processor_balance = sched_SMT_balance, | |
1690 | ||
1691 | .rt_runq = sched_rtglobal_runq, | |
1692 | .rt_init = sched_rtglobal_init, | |
1693 | .rt_queue_shutdown = sched_rtglobal_queue_shutdown, | |
1694 | .rt_runq_scan = sched_rtglobal_runq_scan, | |
1695 | .rt_runq_count_sum = sched_rtglobal_runq_count_sum, | |
1696 | ||
1697 | .qos_max_parallelism = sched_qos_max_parallelism, | |
1698 | .check_spill = sched_check_spill, | |
1699 | .ipi_policy = sched_ipi_policy, | |
1700 | .thread_should_yield = sched_thread_should_yield, | |
1701 | .run_count_incr = sched_clutch_run_incr, | |
1702 | .run_count_decr = sched_clutch_run_decr, | |
1703 | .update_thread_bucket = sched_clutch_update_thread_bucket, | |
1704 | .pset_made_schedulable = sched_pset_made_schedulable, | |
1705 | }; | |
1706 | ||
1707 | __attribute__((always_inline)) | |
1708 | static inline run_queue_t | |
1709 | sched_clutch_bound_runq(processor_t processor) | |
1710 | { | |
1711 | return &processor->runq; | |
1712 | } | |
1713 | ||
1714 | __attribute__((always_inline)) | |
1715 | static inline sched_clutch_root_t | |
1716 | sched_clutch_processor_root_clutch(processor_t processor) | |
1717 | { | |
1718 | return &processor->processor_set->pset_clutch_root; | |
1719 | } | |
1720 | ||
1721 | __attribute__((always_inline)) | |
1722 | static inline run_queue_t | |
1723 | sched_clutch_thread_bound_runq(processor_t processor, __assert_only thread_t thread) | |
1724 | { | |
1725 | assert(thread->bound_processor == processor); | |
1726 | return sched_clutch_bound_runq(processor); | |
1727 | } | |
1728 | ||
1729 | static uint32_t | |
1730 | sched_clutch_initial_quantum_size(thread_t thread) | |
1731 | { | |
1732 | if (thread == THREAD_NULL) { | |
1733 | return std_quantum; | |
1734 | } | |
1735 | assert(sched_clutch_thread_quantum[thread->th_sched_bucket] <= UINT32_MAX); | |
1736 | return (uint32_t)sched_clutch_thread_quantum[thread->th_sched_bucket]; | |
1737 | } | |
1738 | ||
1739 | static sched_mode_t | |
1740 | sched_clutch_initial_thread_sched_mode(task_t parent_task) | |
1741 | { | |
1742 | if (parent_task == kernel_task) { | |
1743 | return TH_MODE_FIXED; | |
1744 | } else { | |
1745 | return TH_MODE_TIMESHARE; | |
1746 | } | |
1747 | } | |
1748 | ||
1749 | static void | |
1750 | sched_clutch_processor_init(processor_t processor) | |
1751 | { | |
1752 | run_queue_init(&processor->runq); | |
1753 | } | |
1754 | ||
1755 | static void | |
1756 | sched_clutch_pset_init(processor_set_t pset) | |
1757 | { | |
1758 | sched_clutch_root_init(&pset->pset_clutch_root, pset); | |
1759 | } | |
1760 | ||
1761 | static void | |
1762 | sched_clutch_init(void) | |
1763 | { | |
1764 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactive_pri", &sched_clutch_bucket_interactive_pri, sizeof(sched_clutch_bucket_interactive_pri))) { | |
1765 | sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT; | |
1766 | } | |
1767 | if (!PE_parse_boot_argn("sched_clutch_bucket_pri_boost", &sched_clutch_bucket_pri_boost, sizeof(sched_clutch_bucket_pri_boost))) { | |
1768 | sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT; | |
1769 | } | |
1770 | sched_timeshare_init(); | |
1771 | } | |
1772 | ||
1773 | static void | |
1774 | sched_clutch_timebase_init(void) | |
1775 | { | |
1776 | sched_timeshare_timebase_init(); | |
1777 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_wcel_us, sched_clutch_root_bucket_wcel); | |
1778 | sched_clutch_us_to_abstime(sched_clutch_root_bucket_warp_us, sched_clutch_root_bucket_warp); | |
1779 | sched_clutch_us_to_abstime(sched_clutch_thread_quantum_us, sched_clutch_thread_quantum); | |
1780 | clock_interval_to_absolutetime_interval(SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS, | |
1781 | NSEC_PER_USEC, &sched_clutch_bucket_adjust_threshold); | |
1782 | ||
1783 | uint32_t interactivity_delta = 0; | |
1784 | if (!PE_parse_boot_argn("sched_clutch_bucket_interactivity_delta_usecs", &interactivity_delta, sizeof(interactivity_delta))) { | |
1785 | interactivity_delta = SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT; | |
1786 | } | |
1787 | clock_interval_to_absolutetime_interval(interactivity_delta, NSEC_PER_USEC, &sched_clutch_bucket_interactivity_delta); | |
1788 | } | |
1789 | ||
1790 | static thread_t | |
1791 | sched_clutch_choose_thread( | |
1792 | processor_t processor, | |
1793 | int priority, | |
1794 | __unused ast_t reason) | |
1795 | { | |
1796 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
1797 | uint32_t clutch_count = sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)); | |
1798 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1799 | boolean_t choose_from_boundq = false; | |
1800 | ||
1801 | if (bound_runq->highq < priority && | |
1802 | clutch_pri < priority) { | |
1803 | return THREAD_NULL; | |
1804 | } | |
1805 | ||
1806 | if (bound_runq->count && clutch_count) { | |
1807 | if (bound_runq->highq >= clutch_pri) { | |
1808 | choose_from_boundq = true; | |
1809 | } | |
1810 | } else if (bound_runq->count) { | |
1811 | choose_from_boundq = true; | |
1812 | } else if (clutch_count) { | |
1813 | choose_from_boundq = false; | |
1814 | } else { | |
1815 | return THREAD_NULL; | |
1816 | } | |
1817 | ||
1818 | thread_t thread = THREAD_NULL; | |
1819 | if (choose_from_boundq == false) { | |
1820 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1821 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1822 | } else { | |
1823 | thread = run_queue_dequeue(bound_runq, SCHED_HEADQ); | |
1824 | } | |
1825 | return thread; | |
1826 | } | |
1827 | ||
1828 | static boolean_t | |
1829 | sched_clutch_processor_enqueue( | |
1830 | processor_t processor, | |
1831 | thread_t thread, | |
1832 | sched_options_t options) | |
1833 | { | |
1834 | boolean_t result; | |
1835 | ||
1836 | thread->runq = processor; | |
1837 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1838 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1839 | result = sched_clutch_thread_insert(pset_clutch_root, thread, options); | |
1840 | } else { | |
1841 | run_queue_t rq = sched_clutch_thread_bound_runq(processor, thread); | |
1842 | result = run_queue_enqueue(rq, thread, options); | |
1843 | } | |
1844 | return result; | |
1845 | } | |
1846 | ||
1847 | static boolean_t | |
1848 | sched_clutch_processor_queue_empty(processor_t processor) | |
1849 | { | |
1850 | return sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0 && | |
1851 | sched_clutch_bound_runq(processor)->count == 0; | |
1852 | } | |
1853 | ||
1854 | static ast_t | |
1855 | sched_clutch_processor_csw_check(processor_t processor) | |
1856 | { | |
1857 | boolean_t has_higher; | |
1858 | int pri; | |
1859 | ||
1860 | if (sched_clutch_thread_avoid_processor(processor, current_thread())) { | |
1861 | return AST_PREEMPT | AST_URGENT; | |
1862 | } | |
1863 | ||
1864 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1865 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
1866 | ||
1867 | assert(processor->active_thread != NULL); | |
1868 | ||
1869 | pri = MAX(clutch_pri, bound_runq->highq); | |
1870 | ||
1871 | if (processor->first_timeslice) { | |
1872 | has_higher = (pri > processor->current_pri); | |
1873 | } else { | |
1874 | has_higher = (pri >= processor->current_pri); | |
1875 | } | |
1876 | ||
1877 | if (has_higher) { | |
1878 | if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) { | |
1879 | return AST_PREEMPT | AST_URGENT; | |
1880 | } | |
1881 | ||
1882 | if (bound_runq->urgency > 0) { | |
1883 | return AST_PREEMPT | AST_URGENT; | |
1884 | } | |
1885 | ||
1886 | return AST_PREEMPT; | |
1887 | } | |
1888 | ||
1889 | return AST_NONE; | |
1890 | } | |
1891 | ||
1892 | static boolean_t | |
1893 | sched_clutch_processor_queue_has_priority(processor_t processor, | |
1894 | int priority, | |
1895 | boolean_t gte) | |
1896 | { | |
1897 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
1898 | ||
1899 | int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq); | |
1900 | ||
1901 | if (gte) { | |
1902 | return qpri >= priority; | |
1903 | } else { | |
1904 | return qpri > priority; | |
1905 | } | |
1906 | } | |
1907 | ||
1908 | static int | |
1909 | sched_clutch_runq_count(processor_t processor) | |
1910 | { | |
1911 | return (int)sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) + sched_clutch_bound_runq(processor)->count; | |
1912 | } | |
1913 | ||
1914 | static uint64_t | |
1915 | sched_clutch_runq_stats_count_sum(processor_t processor) | |
1916 | { | |
1917 | uint64_t bound_sum = sched_clutch_bound_runq(processor)->runq_stats.count_sum; | |
1918 | ||
1919 | if (processor->cpu_id == processor->processor_set->cpu_set_low) { | |
1920 | return bound_sum + sched_clutch_root_count_sum(sched_clutch_processor_root_clutch(processor)); | |
1921 | } else { | |
1922 | return bound_sum; | |
1923 | } | |
1924 | } | |
1925 | static int | |
1926 | sched_clutch_processor_bound_count(processor_t processor) | |
1927 | { | |
1928 | return sched_clutch_bound_runq(processor)->count; | |
1929 | } | |
1930 | ||
1931 | static void | |
1932 | sched_clutch_processor_queue_shutdown(processor_t processor) | |
1933 | { | |
1934 | processor_set_t pset = processor->processor_set; | |
1935 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1936 | thread_t thread; | |
1937 | queue_head_t tqueue; | |
1938 | ||
1939 | /* We only need to migrate threads if this is the last active processor in the pset */ | |
1940 | if (pset->online_processor_count > 0) { | |
1941 | pset_unlock(pset); | |
1942 | return; | |
1943 | } | |
1944 | ||
1945 | queue_init(&tqueue); | |
1946 | while (sched_clutch_root_count(pset_clutch_root) > 0) { | |
1947 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
1948 | enqueue_tail(&tqueue, &thread->runq_links); | |
1949 | } | |
1950 | ||
1951 | pset_unlock(pset); | |
1952 | ||
1953 | qe_foreach_element_safe(thread, &tqueue, runq_links) { | |
1954 | remqueue(&thread->runq_links); | |
1955 | ||
1956 | thread_lock(thread); | |
1957 | ||
1958 | thread_setrun(thread, SCHED_TAILQ); | |
1959 | ||
1960 | thread_unlock(thread); | |
1961 | } | |
1962 | } | |
1963 | ||
1964 | static boolean_t | |
1965 | sched_clutch_processor_queue_remove( | |
1966 | processor_t processor, | |
1967 | thread_t thread) | |
1968 | { | |
1969 | run_queue_t rq; | |
1970 | processor_set_t pset = processor->processor_set; | |
1971 | ||
1972 | pset_lock(pset); | |
1973 | ||
1974 | if (processor == thread->runq) { | |
1975 | /* | |
1976 | * Thread is on a run queue and we have a lock on | |
1977 | * that run queue. | |
1978 | */ | |
1979 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) { | |
1980 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
1981 | sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time()); | |
1982 | } else { | |
1983 | rq = sched_clutch_thread_bound_runq(processor, thread); | |
1984 | run_queue_remove(rq, thread); | |
1985 | } | |
1986 | } else { | |
1987 | /* | |
1988 | * The thread left the run queue before we could | |
1989 | * lock the run queue. | |
1990 | */ | |
1991 | assert(thread->runq == PROCESSOR_NULL); | |
1992 | processor = PROCESSOR_NULL; | |
1993 | } | |
1994 | ||
1995 | pset_unlock(pset); | |
1996 | ||
1997 | return processor != PROCESSOR_NULL; | |
1998 | } | |
1999 | ||
2000 | static thread_t | |
2001 | sched_clutch_steal_thread(processor_set_t pset) | |
2002 | { | |
2003 | processor_set_t nset, cset = pset; | |
2004 | thread_t thread; | |
2005 | ||
2006 | do { | |
2007 | sched_clutch_root_t pset_clutch_root = &cset->pset_clutch_root; | |
2008 | if (sched_clutch_root_count(pset_clutch_root) > 0) { | |
2009 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2010 | pset_unlock(cset); | |
2011 | return thread; | |
2012 | } | |
2013 | ||
2014 | nset = next_pset(cset); | |
2015 | ||
2016 | if (nset != pset) { | |
2017 | pset_unlock(cset); | |
2018 | ||
2019 | cset = nset; | |
2020 | pset_lock(cset); | |
2021 | } | |
2022 | } while (nset != pset); | |
2023 | ||
2024 | pset_unlock(cset); | |
2025 | ||
2026 | return THREAD_NULL; | |
2027 | } | |
2028 | ||
2029 | static void | |
2030 | sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context) | |
2031 | { | |
2032 | boolean_t restart_needed = FALSE; | |
2033 | processor_t processor = processor_list; | |
2034 | processor_set_t pset; | |
2035 | thread_t thread; | |
2036 | spl_t s; | |
2037 | ||
2038 | /* | |
2039 | * We update the threads associated with each processor (bound and idle threads) | |
2040 | * and then update the threads in each pset runqueue. | |
2041 | */ | |
2042 | ||
2043 | do { | |
2044 | do { | |
2045 | pset = processor->processor_set; | |
2046 | ||
2047 | s = splsched(); | |
2048 | pset_lock(pset); | |
2049 | ||
2050 | restart_needed = runq_scan(sched_clutch_bound_runq(processor), scan_context); | |
2051 | ||
2052 | pset_unlock(pset); | |
2053 | splx(s); | |
2054 | ||
2055 | if (restart_needed) { | |
2056 | break; | |
2057 | } | |
2058 | ||
2059 | thread = processor->idle_thread; | |
2060 | if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { | |
2061 | if (thread_update_add_thread(thread) == FALSE) { | |
2062 | restart_needed = TRUE; | |
2063 | break; | |
2064 | } | |
2065 | } | |
2066 | } while ((processor = processor->processor_list) != NULL); | |
2067 | ||
2068 | /* Ok, we now have a collection of candidates -- fix them. */ | |
2069 | thread_update_process_threads(); | |
2070 | } while (restart_needed); | |
2071 | ||
2072 | pset = &pset0; | |
2073 | ||
2074 | do { | |
2075 | do { | |
2076 | s = splsched(); | |
2077 | pset_lock(pset); | |
2078 | ||
2079 | if (sched_clutch_root_count(&pset->pset_clutch_root) > 0) { | |
2080 | queue_t clutch_bucket_list = &pset->pset_clutch_root.scr_clutch_buckets; | |
2081 | sched_clutch_bucket_t clutch_bucket; | |
2082 | qe_foreach_element(clutch_bucket, clutch_bucket_list, scb_listlink) { | |
2083 | sched_clutch_bucket_timeshare_update(clutch_bucket); | |
2084 | restart_needed = runq_scan(&clutch_bucket->scb_runq, scan_context); | |
2085 | if (restart_needed) { | |
2086 | break; | |
2087 | } | |
2088 | } | |
2089 | } | |
2090 | ||
2091 | pset_unlock(pset); | |
2092 | splx(s); | |
2093 | if (restart_needed) { | |
2094 | break; | |
2095 | } | |
2096 | } while ((pset = pset->pset_list) != NULL); | |
2097 | ||
2098 | /* Ok, we now have a collection of candidates -- fix them. */ | |
2099 | thread_update_process_threads(); | |
2100 | } while (restart_needed); | |
2101 | } | |
2102 | ||
2103 | extern int sched_allow_rt_smt; | |
2104 | ||
2105 | /* Return true if this thread should not continue running on this processor */ | |
2106 | static bool | |
2107 | sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread) | |
2108 | { | |
2109 | if (processor->processor_primary != processor) { | |
2110 | /* | |
2111 | * This is a secondary SMT processor. If the primary is running | |
2112 | * a realtime thread, only allow realtime threads on the secondary. | |
2113 | */ | |
2114 | if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) || !sched_allow_rt_smt)) { | |
2115 | return true; | |
2116 | } | |
2117 | } | |
2118 | ||
2119 | return false; | |
2120 | } | |
2121 | ||
2122 | /* | |
2123 | * For the clutch scheduler, the run counts are maintained in the clutch | |
2124 | * buckets (i.e thread group scheduling structure). | |
2125 | */ | |
2126 | static uint32_t | |
2127 | sched_clutch_run_incr(thread_t thread) | |
2128 | { | |
2129 | assert((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN); | |
2130 | uint32_t new_count = os_atomic_inc(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2131 | sched_clutch_thread_run_bucket_incr(thread, thread->th_sched_bucket); | |
2132 | return new_count; | |
2133 | } | |
2134 | ||
2135 | static uint32_t | |
2136 | sched_clutch_run_decr(thread_t thread) | |
2137 | { | |
2138 | assert((thread->state & (TH_RUN | TH_IDLE)) != TH_RUN); | |
2139 | uint32_t new_count = os_atomic_dec(&sched_run_buckets[TH_BUCKET_RUN], relaxed); | |
2140 | sched_clutch_thread_run_bucket_decr(thread, thread->th_sched_bucket); | |
2141 | return new_count; | |
2142 | } | |
2143 | ||
2144 | static sched_bucket_t | |
2145 | sched_convert_pri_to_bucket(uint8_t priority) | |
2146 | { | |
2147 | sched_bucket_t bucket = TH_BUCKET_RUN; | |
2148 | ||
2149 | if (priority > BASEPRI_USER_INITIATED) { | |
2150 | bucket = TH_BUCKET_SHARE_FG; | |
2151 | } else if (priority > BASEPRI_DEFAULT) { | |
2152 | bucket = TH_BUCKET_SHARE_IN; | |
2153 | } else if (priority > BASEPRI_UTILITY) { | |
2154 | bucket = TH_BUCKET_SHARE_DF; | |
2155 | } else if (priority > MAXPRI_THROTTLE) { | |
2156 | bucket = TH_BUCKET_SHARE_UT; | |
2157 | } else { | |
2158 | bucket = TH_BUCKET_SHARE_BG; | |
2159 | } | |
2160 | return bucket; | |
2161 | } | |
2162 | ||
2163 | /* | |
2164 | * For threads that have changed sched_pri without changing the | |
2165 | * base_pri for any reason other than decay, use the sched_pri | |
2166 | * as the bucketizing priority instead of base_pri. All such | |
2167 | * changes are typically due to kernel locking primitives boosts | |
2168 | * or demotions. | |
2169 | */ | |
2170 | static boolean_t | |
2171 | sched_thread_sched_pri_promoted(thread_t thread) | |
2172 | { | |
2173 | return (thread->sched_flags & TH_SFLAG_PROMOTED) || | |
2174 | (thread->sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) || | |
2175 | (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) || | |
2176 | (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || | |
2177 | (thread->kern_promotion_schedpri != 0); | |
2178 | } | |
2179 | ||
2180 | /* | |
2181 | * Routine to update the scheduling bucket for the thread. | |
2182 | * | |
2183 | * In the clutch scheduler implementation, the thread's bucket | |
2184 | * is based on sched_pri if it was promoted due to a kernel | |
2185 | * primitive; otherwise its based on the thread base_pri. This | |
2186 | * enhancement allows promoted threads to reach a higher priority | |
2187 | * bucket and potentially get selected sooner for scheduling. | |
2188 | * | |
2189 | * Also, the clutch scheduler does not honor fixed priority below | |
2190 | * FG priority. It simply puts those threads in the corresponding | |
2191 | * timeshare bucket. The reason for to do that is because it is | |
2192 | * extremely hard to define the scheduling properties of such threads | |
2193 | * and they typically lead to performance issues. | |
2194 | */ | |
2195 | ||
2196 | void | |
2197 | sched_clutch_update_thread_bucket(thread_t thread) | |
2198 | { | |
2199 | sched_bucket_t old_bucket = thread->th_sched_bucket; | |
2200 | sched_bucket_t new_bucket = TH_BUCKET_RUN; | |
2201 | assert(thread->runq == PROCESSOR_NULL); | |
2202 | ||
2203 | int pri = (sched_thread_sched_pri_promoted(thread)) ? thread->sched_pri : thread->base_pri; | |
2204 | ||
2205 | switch (thread->sched_mode) { | |
2206 | case TH_MODE_FIXED: | |
2207 | if (pri >= BASEPRI_FOREGROUND) { | |
2208 | new_bucket = TH_BUCKET_FIXPRI; | |
2209 | } else { | |
2210 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2211 | } | |
2212 | break; | |
2213 | ||
2214 | case TH_MODE_REALTIME: | |
2215 | new_bucket = TH_BUCKET_FIXPRI; | |
2216 | break; | |
2217 | ||
2218 | case TH_MODE_TIMESHARE: | |
2219 | new_bucket = sched_convert_pri_to_bucket(pri); | |
2220 | break; | |
2221 | ||
2222 | default: | |
2223 | panic("unexpected mode: %d", thread->sched_mode); | |
2224 | break; | |
2225 | } | |
2226 | ||
2227 | if (old_bucket == new_bucket) { | |
2228 | return; | |
2229 | } | |
2230 | ||
2231 | thread->th_sched_bucket = new_bucket; | |
2232 | thread->pri_shift = sched_clutch_thread_pri_shift(thread, new_bucket); | |
2233 | ||
2234 | /* | |
2235 | * Since this is called after the thread has been removed from the runq, | |
2236 | * only the run counts need to be updated. The re-insert into the runq | |
2237 | * would put the thread into the correct new bucket's runq. | |
2238 | */ | |
2239 | if ((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN) { | |
2240 | sched_clutch_thread_run_bucket_decr(thread, old_bucket); | |
2241 | sched_clutch_thread_run_bucket_incr(thread, new_bucket); | |
2242 | } | |
2243 | } | |
2244 | ||
c6bf4f31 A |
2245 | #if __AMP__ |
2246 | ||
2247 | /* Implementation of the AMP version of the clutch scheduler */ | |
2248 | ||
2249 | static thread_t | |
2250 | sched_clutch_amp_steal_thread(processor_set_t pset); | |
2251 | ||
2252 | static ast_t | |
2253 | sched_clutch_amp_processor_csw_check(processor_t processor); | |
2254 | ||
2255 | static boolean_t | |
2256 | sched_clutch_amp_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); | |
2257 | ||
2258 | static boolean_t | |
2259 | sched_clutch_amp_processor_queue_empty(processor_t processor); | |
2260 | ||
2261 | static thread_t | |
2262 | sched_clutch_amp_choose_thread(processor_t processor, int priority, ast_t reason); | |
2263 | ||
2264 | static void | |
2265 | sched_clutch_amp_processor_queue_shutdown(processor_t processor); | |
2266 | ||
2267 | static processor_t | |
2268 | sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread); | |
2269 | ||
2270 | static bool | |
2271 | sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread); | |
2272 | ||
2273 | static bool | |
2274 | sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread); | |
2275 | ||
2276 | static void | |
2277 | sched_clutch_migrate_foreign_buckets(processor_t processor, processor_set_t dst_pset, boolean_t drop_lock); | |
2278 | ||
2279 | static void | |
2280 | sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation); | |
2281 | ||
2282 | const struct sched_dispatch_table sched_clutch_amp_dispatch = { | |
2283 | .sched_name = "clutch_amp", | |
2284 | .init = sched_amp_init, | |
2285 | .timebase_init = sched_clutch_timebase_init, | |
2286 | .processor_init = sched_clutch_processor_init, | |
2287 | .pset_init = sched_clutch_pset_init, | |
2288 | .maintenance_continuation = sched_timeshare_maintenance_continue, | |
2289 | .choose_thread = sched_clutch_amp_choose_thread, | |
2290 | .steal_thread_enabled = sched_amp_steal_thread_enabled, | |
2291 | .steal_thread = sched_clutch_amp_steal_thread, | |
2292 | .compute_timeshare_priority = sched_compute_timeshare_priority, | |
2293 | .choose_processor = sched_clutch_amp_choose_processor, | |
2294 | .processor_enqueue = sched_clutch_processor_enqueue, | |
2295 | .processor_queue_shutdown = sched_clutch_amp_processor_queue_shutdown, | |
2296 | .processor_queue_remove = sched_clutch_processor_queue_remove, | |
2297 | .processor_queue_empty = sched_clutch_amp_processor_queue_empty, | |
2298 | .priority_is_urgent = priority_is_urgent, | |
2299 | .processor_csw_check = sched_clutch_amp_processor_csw_check, | |
2300 | .processor_queue_has_priority = sched_clutch_amp_processor_queue_has_priority, | |
2301 | .initial_quantum_size = sched_clutch_initial_quantum_size, | |
2302 | .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode, | |
2303 | .can_update_priority = can_update_priority, | |
2304 | .update_priority = update_priority, | |
2305 | .lightweight_update_priority = lightweight_update_priority, | |
2306 | .quantum_expire = sched_default_quantum_expire, | |
2307 | .processor_runq_count = sched_clutch_runq_count, | |
2308 | .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum, | |
2309 | .processor_bound_count = sched_clutch_processor_bound_count, | |
2310 | .thread_update_scan = sched_clutch_thread_update_scan, | |
2311 | .multiple_psets_enabled = TRUE, | |
2312 | .sched_groups_enabled = FALSE, | |
2313 | .avoid_processor_enabled = TRUE, | |
2314 | .thread_avoid_processor = sched_clutch_amp_thread_avoid_processor, | |
2315 | .processor_balance = sched_amp_balance, | |
2316 | ||
2317 | .rt_runq = sched_amp_rt_runq, | |
2318 | .rt_init = sched_amp_rt_init, | |
2319 | .rt_queue_shutdown = sched_amp_rt_queue_shutdown, | |
2320 | .rt_runq_scan = sched_amp_rt_runq_scan, | |
2321 | .rt_runq_count_sum = sched_amp_rt_runq_count_sum, | |
2322 | ||
2323 | .qos_max_parallelism = sched_amp_qos_max_parallelism, | |
2324 | .check_spill = sched_amp_check_spill, | |
2325 | .ipi_policy = sched_amp_ipi_policy, | |
2326 | .thread_should_yield = sched_clutch_amp_thread_should_yield, | |
2327 | .run_count_incr = sched_clutch_run_incr, | |
2328 | .run_count_decr = sched_clutch_run_decr, | |
2329 | .update_thread_bucket = sched_clutch_update_thread_bucket, | |
2330 | .pset_made_schedulable = sched_clutch_migrate_foreign_buckets, | |
2331 | .thread_group_recommendation_change = sched_clutch_amp_thread_group_recommendation_change, | |
2332 | }; | |
2333 | ||
2334 | extern processor_set_t ecore_set; | |
2335 | extern processor_set_t pcore_set; | |
2336 | ||
2337 | static thread_t | |
2338 | sched_clutch_amp_choose_thread( | |
2339 | processor_t processor, | |
2340 | int priority, | |
2341 | __unused ast_t reason) | |
2342 | { | |
2343 | processor_set_t pset = processor->processor_set; | |
2344 | bool spill_pending = false; | |
2345 | int spill_pri = -1; | |
2346 | ||
2347 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2348 | spill_pending = true; | |
2349 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2350 | } | |
2351 | ||
2352 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
2353 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2354 | boolean_t choose_from_boundq = false; | |
2355 | ||
2356 | if ((bound_runq->highq < priority) && | |
2357 | (clutch_pri < priority) && | |
2358 | (spill_pri < priority)) { | |
2359 | return THREAD_NULL; | |
2360 | } | |
2361 | ||
2362 | if ((spill_pri > bound_runq->highq) && | |
2363 | (spill_pri > clutch_pri)) { | |
2364 | /* | |
2365 | * There is a higher priority thread on the P-core runq, | |
2366 | * so returning THREAD_NULL here will cause thread_select() | |
2367 | * to call sched_clutch_amp_steal_thread() to try to get it. | |
2368 | */ | |
2369 | return THREAD_NULL; | |
2370 | } | |
2371 | ||
2372 | if (bound_runq->highq >= clutch_pri) { | |
2373 | choose_from_boundq = true; | |
2374 | } | |
2375 | ||
2376 | thread_t thread = THREAD_NULL; | |
2377 | if (choose_from_boundq == false) { | |
2378 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
2379 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2380 | } else { | |
2381 | thread = run_queue_dequeue(bound_runq, SCHED_HEADQ); | |
2382 | } | |
2383 | return thread; | |
2384 | } | |
2385 | ||
2386 | static boolean_t | |
2387 | sched_clutch_amp_processor_queue_empty(processor_t processor) | |
2388 | { | |
2389 | processor_set_t pset = processor->processor_set; | |
2390 | bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2391 | ||
2392 | return (sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0) && | |
2393 | (sched_clutch_bound_runq(processor)->count == 0) && | |
2394 | !spill_pending; | |
2395 | } | |
2396 | ||
2397 | static bool | |
2398 | sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread) | |
2399 | { | |
2400 | if (!sched_clutch_amp_processor_queue_empty(processor) || (rt_runq_count(processor->processor_set) > 0)) { | |
2401 | return true; | |
2402 | } | |
2403 | ||
2404 | if ((processor->processor_set->pset_cluster_type == PSET_AMP_E) && (recommended_pset_type(thread) == PSET_AMP_P)) { | |
2405 | return sched_clutch_root_count(&pcore_set->pset_clutch_root) > 0; | |
2406 | } | |
2407 | ||
2408 | return false; | |
2409 | } | |
2410 | ||
2411 | static ast_t | |
2412 | sched_clutch_amp_processor_csw_check(processor_t processor) | |
2413 | { | |
2414 | boolean_t has_higher; | |
2415 | int pri; | |
2416 | ||
2417 | int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)); | |
2418 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2419 | ||
2420 | assert(processor->active_thread != NULL); | |
2421 | ||
2422 | processor_set_t pset = processor->processor_set; | |
2423 | bool spill_pending = false; | |
2424 | int spill_pri = -1; | |
2425 | int spill_urgency = 0; | |
2426 | ||
2427 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2428 | spill_pending = true; | |
2429 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2430 | spill_urgency = sched_clutch_root_urgency(&pcore_set->pset_clutch_root); | |
2431 | } | |
2432 | ||
2433 | pri = MAX(clutch_pri, bound_runq->highq); | |
2434 | if (spill_pending) { | |
2435 | pri = MAX(pri, spill_pri); | |
2436 | } | |
2437 | ||
2438 | if (processor->first_timeslice) { | |
2439 | has_higher = (pri > processor->current_pri); | |
2440 | } else { | |
2441 | has_higher = (pri >= processor->current_pri); | |
2442 | } | |
2443 | ||
2444 | if (has_higher) { | |
2445 | if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) { | |
2446 | return AST_PREEMPT | AST_URGENT; | |
2447 | } | |
2448 | ||
2449 | if (bound_runq->urgency > 0) { | |
2450 | return AST_PREEMPT | AST_URGENT; | |
2451 | } | |
2452 | ||
2453 | if (spill_urgency > 0) { | |
2454 | return AST_PREEMPT | AST_URGENT; | |
2455 | } | |
2456 | ||
2457 | return AST_PREEMPT; | |
2458 | } | |
2459 | ||
2460 | return AST_NONE; | |
2461 | } | |
2462 | ||
2463 | static boolean_t | |
2464 | sched_clutch_amp_processor_queue_has_priority(processor_t processor, | |
2465 | int priority, | |
2466 | boolean_t gte) | |
2467 | { | |
2468 | bool spill_pending = false; | |
2469 | int spill_pri = -1; | |
2470 | processor_set_t pset = processor->processor_set; | |
2471 | ||
2472 | if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { | |
2473 | spill_pending = true; | |
2474 | spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root); | |
2475 | } | |
2476 | run_queue_t bound_runq = sched_clutch_bound_runq(processor); | |
2477 | ||
2478 | int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq); | |
2479 | if (spill_pending) { | |
2480 | qpri = MAX(qpri, spill_pri); | |
2481 | } | |
2482 | ||
2483 | if (gte) { | |
2484 | return qpri >= priority; | |
2485 | } else { | |
2486 | return qpri > priority; | |
2487 | } | |
2488 | } | |
2489 | ||
2490 | /* | |
2491 | * sched_clutch_hierarchy_thread_pset() | |
2492 | * | |
2493 | * Routine to determine where a thread should be enqueued based on its | |
2494 | * recommendation if this is the first runnable thread in the clutch_bucket | |
2495 | * or its clutch bucket's hierarchy membership. | |
2496 | */ | |
2497 | static processor_set_t | |
2498 | sched_clutch_hierarchy_thread_pset(thread_t thread) | |
2499 | { | |
2500 | if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread) == false) { | |
2501 | return (recommended_pset_type(thread) == PSET_AMP_P) ? pcore_set : ecore_set; | |
2502 | } | |
2503 | ||
2504 | sched_clutch_t clutch = sched_clutch_for_thread(thread); | |
2505 | sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]); | |
2506 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
2507 | if (scb_root) { | |
2508 | /* Clutch bucket is already runnable, return the pset hierarchy its part of */ | |
2509 | return scb_root->scr_pset; | |
2510 | } | |
2511 | return (recommended_pset_type(thread) == PSET_AMP_E) ? ecore_set : pcore_set; | |
2512 | } | |
2513 | ||
2514 | /* | |
2515 | * sched_clutch_thread_pset_recommended() | |
2516 | * | |
2517 | * Routine to determine if the thread should be placed on the provided pset. | |
2518 | * The routine first makes sure the cluster is available for scheduling. If | |
2519 | * it is available, it looks at the thread's recommendation. Called | |
2520 | * with the pset lock held. | |
2521 | */ | |
2522 | static bool | |
2523 | sched_clutch_thread_pset_recommended(thread_t thread, processor_set_t pset) | |
2524 | { | |
2525 | if (!sched_clutch_pset_available(pset)) { | |
2526 | return false; | |
2527 | } | |
2528 | ||
2529 | /* At this point, all clusters should be available and recommended */ | |
2530 | if (sched_clutch_hierarchy_thread_pset(thread) != pset) { | |
2531 | return false; | |
2532 | } | |
2533 | ||
2534 | return true; | |
2535 | } | |
2536 | ||
2537 | ||
2538 | static void | |
2539 | sched_clutch_amp_processor_queue_shutdown(processor_t processor) | |
2540 | { | |
2541 | processor_set_t pset = processor->processor_set; | |
2542 | sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor); | |
2543 | thread_t thread; | |
2544 | queue_head_t tqueue; | |
2545 | ||
2546 | /* We only need to migrate threads if this is the last active or last recommended processor in the pset */ | |
2547 | if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) { | |
2548 | pset_unlock(pset); | |
2549 | return; | |
2550 | } | |
2551 | ||
2552 | queue_init(&tqueue); | |
2553 | while (sched_clutch_root_count(pset_clutch_root) > 0) { | |
2554 | thread = sched_clutch_thread_highest(pset_clutch_root); | |
2555 | enqueue_tail(&tqueue, &thread->runq_links); | |
2556 | } | |
2557 | pset_unlock(pset); | |
2558 | ||
2559 | qe_foreach_element_safe(thread, &tqueue, runq_links) { | |
2560 | remqueue(&thread->runq_links); | |
2561 | thread_lock(thread); | |
2562 | thread_setrun(thread, SCHED_TAILQ); | |
2563 | thread_unlock(thread); | |
2564 | } | |
2565 | } | |
2566 | ||
2567 | static thread_t | |
2568 | sched_clutch_amp_steal_thread(processor_set_t pset) | |
2569 | { | |
2570 | thread_t thread = THREAD_NULL; | |
2571 | processor_set_t nset = pset; | |
2572 | ||
2573 | if (pcore_set->online_processor_count == 0) { | |
2574 | /* Nothing to steal from */ | |
2575 | goto out; | |
2576 | } | |
2577 | ||
2578 | if (pset->pset_cluster_type == PSET_AMP_P) { | |
2579 | /* P cores don't steal from E cores */ | |
2580 | goto out; | |
2581 | } | |
2582 | ||
2583 | processor_t processor = current_processor(); | |
2584 | assert(pset == processor->processor_set); | |
2585 | ||
2586 | bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2587 | bit_clear(pset->pending_spill_cpu_mask, processor->cpu_id); | |
2588 | ||
2589 | nset = pcore_set; | |
2590 | ||
2591 | assert(nset != pset); | |
2592 | ||
2593 | if (sched_get_pset_load_average(nset) >= sched_amp_steal_threshold(nset, spill_pending)) { | |
2594 | pset_unlock(pset); | |
2595 | ||
2596 | pset = nset; | |
2597 | ||
2598 | pset_lock(pset); | |
2599 | ||
2600 | /* Allow steal if load average still OK, no idle cores, and more threads on runq than active cores DISPATCHING */ | |
2601 | if ((sched_get_pset_load_average(pset) >= sched_amp_steal_threshold(pset, spill_pending)) && | |
2602 | ((int)sched_clutch_root_count(&pset->pset_clutch_root) > bit_count(pset->cpu_state_map[PROCESSOR_DISPATCHING])) && | |
2603 | (bit_count(pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) == 0)) { | |
2604 | thread = sched_clutch_thread_highest(&pset->pset_clutch_root); | |
2605 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_STEAL) | DBG_FUNC_NONE, spill_pending, 0, 0, 0); | |
2606 | sched_update_pset_load_average(pset); | |
2607 | } | |
2608 | } | |
2609 | ||
2610 | out: | |
2611 | pset_unlock(pset); | |
2612 | return thread; | |
2613 | } | |
2614 | ||
2615 | /* Return true if this thread should not continue running on this processor */ | |
2616 | static bool | |
2617 | sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread) | |
2618 | { | |
2619 | if (processor->processor_set->pset_cluster_type == PSET_AMP_E) { | |
2620 | if (sched_clutch_thread_pset_recommended(thread, pcore_set)) { | |
2621 | return true; | |
2622 | } | |
2623 | } else if (processor->processor_set->pset_cluster_type == PSET_AMP_P) { | |
2624 | if (!sched_clutch_thread_pset_recommended(thread, pcore_set)) { | |
2625 | return true; | |
2626 | } | |
2627 | } | |
2628 | ||
2629 | return false; | |
2630 | } | |
2631 | ||
2632 | static processor_t | |
2633 | sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread) | |
2634 | { | |
2635 | /* Bound threads don't call this function */ | |
2636 | assert(thread->bound_processor == PROCESSOR_NULL); | |
2637 | ||
2638 | processor_set_t nset; | |
2639 | processor_t chosen_processor = PROCESSOR_NULL; | |
2640 | ||
2641 | select_pset: | |
2642 | nset = (pset == ecore_set) ? pcore_set : ecore_set; | |
2643 | if (!sched_clutch_pset_available(pset)) { | |
2644 | /* If the current pset is not available for scheduling, just use the other pset */ | |
2645 | pset_unlock(pset); | |
2646 | pset_lock(nset); | |
2647 | goto select_processor; | |
2648 | } | |
2649 | ||
2650 | /* Check if the thread is recommended to run on this pset */ | |
2651 | if (sched_clutch_thread_pset_recommended(thread, pset)) { | |
2652 | nset = pset; | |
2653 | goto select_processor; | |
2654 | } else { | |
2655 | /* pset not recommended; try the other pset */ | |
2656 | pset_unlock(pset); | |
2657 | pset_lock(nset); | |
2658 | pset = nset; | |
2659 | goto select_pset; | |
2660 | } | |
2661 | ||
2662 | select_processor: | |
2663 | if (!sched_clutch_pset_available(nset)) { | |
2664 | /* | |
2665 | * It looks like both psets are not available due to some | |
2666 | * reason. In that case, just use the master processor's | |
2667 | * pset for scheduling. | |
2668 | */ | |
2669 | if (master_processor->processor_set != nset) { | |
2670 | pset_unlock(nset); | |
2671 | nset = master_processor->processor_set; | |
2672 | pset_lock(nset); | |
2673 | } | |
2674 | } | |
2675 | chosen_processor = choose_processor(nset, processor, thread); | |
2676 | assert(chosen_processor->processor_set == nset); | |
2677 | return chosen_processor; | |
2678 | } | |
2679 | ||
2680 | /* | |
2681 | * AMP Clutch Scheduler Thread Migration | |
2682 | * | |
2683 | * For the AMP version of the clutch scheduler the thread is always scheduled via its | |
2684 | * thread group. So it is important to make sure that the thread group is part of the | |
2685 | * correct processor set hierarchy. In order to do that, the clutch scheduler moves | |
2686 | * all eligble clutch buckets to the correct hierarchy when the recommendation of a | |
2687 | * thread group is changed by CLPC. | |
2688 | */ | |
2689 | ||
2690 | /* | |
2691 | * sched_clutch_recommended_pset() | |
2692 | * | |
2693 | * Routine to decide which hierarchy the thread group should be in based on the | |
2694 | * recommendation and other thread group and system properties. This routine is | |
2695 | * used to determine if thread group migration is necessary and should mimic the | |
2696 | * logic in sched_clutch_thread_pset_recommended() & recommended_pset_type(). | |
2697 | */ | |
2698 | static processor_set_t | |
2699 | sched_clutch_recommended_pset(sched_clutch_t sched_clutch, cluster_type_t recommendation) | |
2700 | { | |
2701 | if (!sched_clutch_pset_available(pcore_set)) { | |
2702 | return ecore_set; | |
2703 | } | |
2704 | ||
2705 | if (!sched_clutch_pset_available(ecore_set)) { | |
2706 | return pcore_set; | |
2707 | } | |
2708 | ||
2709 | /* | |
2710 | * If all clusters are available and recommended, use the recommendation | |
2711 | * to decide which cluster to use. | |
2712 | */ | |
2713 | pset_cluster_type_t type = thread_group_pset_recommendation(sched_clutch->sc_tg, recommendation); | |
2714 | return (type == PSET_AMP_E) ? ecore_set : pcore_set; | |
2715 | } | |
2716 | ||
2717 | static void | |
2718 | sched_clutch_bucket_threads_drain(sched_clutch_bucket_t clutch_bucket, sched_clutch_root_t root_clutch, queue_t clutch_threads) | |
2719 | { | |
2720 | uint16_t thread_count = clutch_bucket->scb_thr_count; | |
2721 | thread_t thread; | |
2722 | uint64_t current_timestamp = mach_approximate_time(); | |
2723 | while (thread_count > 0) { | |
2724 | thread = run_queue_peek(&clutch_bucket->scb_runq); | |
2725 | sched_clutch_thread_remove(root_clutch, thread, current_timestamp); | |
2726 | enqueue_tail(clutch_threads, &thread->runq_links); | |
2727 | thread_count--; | |
2728 | } | |
2729 | ||
2730 | /* | |
2731 | * This operation should have drained the clutch bucket and pulled it out of the | |
2732 | * hierarchy. | |
2733 | */ | |
2734 | assert(clutch_bucket->scb_thr_count == 0); | |
2735 | assert(clutch_bucket->scb_root == NULL); | |
2736 | } | |
2737 | ||
2738 | /* | |
2739 | * sched_clutch_migrate_thread_group() | |
2740 | * | |
2741 | * Routine to implement the migration of threads when the thread group | |
2742 | * recommendation is updated. The migration works using a 2-phase | |
2743 | * algorithm. | |
2744 | * | |
2745 | * Phase 1: With the source pset (determined by sched_clutch_recommended_pset) | |
2746 | * locked, drain all the runnable threads into a local queue and update the TG | |
2747 | * recommendation. | |
2748 | * | |
2749 | * Phase 2: Call thread_setrun() on all the drained threads. Since the TG recommendation | |
2750 | * has been updated, these should all end up in the right hierarchy. | |
2751 | */ | |
2752 | static void | |
2753 | sched_clutch_migrate_thread_group(sched_clutch_t sched_clutch, cluster_type_t new_recommendation) | |
2754 | { | |
2755 | thread_t thread; | |
2756 | ||
2757 | /* If the thread group is empty, just update the recommendation */ | |
2758 | if (os_atomic_load(&sched_clutch->sc_thr_count, relaxed) == 0) { | |
2759 | thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation); | |
2760 | return; | |
2761 | } | |
2762 | ||
2763 | processor_set_t dst_pset = sched_clutch_recommended_pset(sched_clutch, new_recommendation); | |
2764 | processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set; | |
2765 | ||
2766 | queue_head_t clutch_threads; | |
2767 | queue_init(&clutch_threads); | |
2768 | ||
2769 | /* Interrupts need to be disabled to make sure threads wont become runnable during the | |
2770 | * migration and attempt to grab the pset/thread locks. | |
2771 | */ | |
2772 | spl_t s = splsched(); | |
2773 | ||
2774 | pset_lock(src_pset); | |
2775 | for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SCHED_MAX; bucket++) { | |
2776 | sched_clutch_bucket_t clutch_bucket = &(sched_clutch->sc_clutch_buckets[bucket]); | |
2777 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
2778 | if ((scb_root == NULL) || (scb_root->scr_pset == dst_pset)) { | |
2779 | /* Clutch bucket not runnable or already in the right hierarchy; nothing to do here */ | |
2780 | continue; | |
2781 | } | |
2782 | assert(scb_root->scr_pset == src_pset); | |
2783 | /* Now remove all the threads from the runq so that thread->runq is set correctly */ | |
2784 | sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads); | |
2785 | } | |
2786 | ||
2787 | /* | |
2788 | * Now that all the clutch buckets have been drained, update the TG recommendation. | |
2789 | * This operation needs to be done with the pset lock held to make sure that anyone | |
2790 | * coming in before the migration started would get the original pset as the root | |
2791 | * of this sched_clutch and attempt to hold the src_pset lock. Once the TG changes, | |
2792 | * all threads that are becoming runnable would find the clutch bucket empty and | |
2793 | * the TG recommendation would coax them to enqueue it in the new recommended | |
2794 | * hierarchy. This effectively synchronizes with other threads calling | |
2795 | * thread_setrun() and trying to decide which pset the thread/clutch_bucket | |
2796 | * belongs in. | |
2797 | */ | |
2798 | thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation); | |
2799 | pset_unlock(src_pset); | |
2800 | ||
2801 | /* Now setrun all the threads in the local queue */ | |
2802 | qe_foreach_element_safe(thread, &clutch_threads, runq_links) { | |
2803 | remqueue(&thread->runq_links); | |
2804 | thread_lock(thread); | |
2805 | thread_setrun(thread, SCHED_TAILQ); | |
2806 | thread_unlock(thread); | |
2807 | } | |
2808 | ||
2809 | splx(s); | |
2810 | } | |
2811 | ||
2812 | static void | |
2813 | sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation) | |
2814 | { | |
2815 | /* | |
2816 | * For the clutch scheduler, the change in recommendation moves the thread group | |
2817 | * to the right hierarchy. sched_clutch_migrate_thread_group() is also responsible | |
2818 | * for updating the recommendation of the thread group. | |
2819 | */ | |
2820 | sched_clutch_migrate_thread_group(&tg->tg_sched_clutch, new_recommendation); | |
2821 | ||
2822 | if (new_recommendation != CLUSTER_TYPE_P) { | |
2823 | return; | |
2824 | } | |
2825 | ||
2826 | sched_amp_bounce_thread_group_from_ecores(ecore_set, tg); | |
2827 | } | |
2828 | ||
2829 | /* | |
2830 | * sched_clutch_migrate_foreign_buckets() | |
2831 | * | |
2832 | * Routine to migrate all the clutch buckets which are not in their recommended | |
2833 | * pset hierarchy now that a new pset has become runnable. The algorithm is | |
2834 | * similar to sched_clutch_migrate_thread_group(). | |
2835 | * | |
2836 | * Invoked with the newly recommended pset lock held and interrupts disabled. | |
2837 | */ | |
2838 | static void | |
2839 | sched_clutch_migrate_foreign_buckets(__unused processor_t processor, processor_set_t dst_pset, boolean_t drop_lock) | |
2840 | { | |
2841 | thread_t thread; | |
2842 | processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set; | |
2843 | ||
2844 | if (!sched_clutch_pset_available(dst_pset)) { | |
2845 | /* | |
2846 | * It is possible that some state about the pset changed, | |
2847 | * but its still not available for scheduling. Nothing to | |
2848 | * do here in that case. | |
2849 | */ | |
2850 | if (drop_lock) { | |
2851 | pset_unlock(dst_pset); | |
2852 | } | |
2853 | return; | |
2854 | } | |
2855 | pset_unlock(dst_pset); | |
2856 | ||
2857 | queue_head_t clutch_threads; | |
2858 | queue_init(&clutch_threads); | |
2859 | sched_clutch_root_t src_root = &src_pset->pset_clutch_root; | |
2860 | ||
2861 | pset_lock(src_pset); | |
2862 | queue_t clutch_bucket_list = &src_pset->pset_clutch_root.scr_foreign_buckets; | |
2863 | ||
2864 | if (sched_clutch_root_count(src_root) == 0) { | |
2865 | /* No threads present in this hierarchy */ | |
2866 | pset_unlock(src_pset); | |
2867 | goto migration_complete; | |
2868 | } | |
2869 | ||
2870 | sched_clutch_bucket_t clutch_bucket; | |
2871 | qe_foreach_element_safe(clutch_bucket, clutch_bucket_list, scb_foreignlink) { | |
2872 | sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed); | |
2873 | assert(scb_root->scr_pset == src_pset); | |
2874 | /* Now remove all the threads from the runq so that thread->runq is set correctly */ | |
2875 | sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads); | |
2876 | assert(clutch_bucket->scb_foreign == false); | |
2877 | } | |
2878 | pset_unlock(src_pset); | |
2879 | ||
2880 | /* Now setrun all the threads in the local queue */ | |
2881 | qe_foreach_element_safe(thread, &clutch_threads, runq_links) { | |
2882 | remqueue(&thread->runq_links); | |
2883 | thread_lock(thread); | |
2884 | thread_setrun(thread, SCHED_TAILQ); | |
2885 | thread_unlock(thread); | |
2886 | } | |
2887 | ||
2888 | migration_complete: | |
2889 | if (!drop_lock) { | |
2890 | pset_lock(dst_pset); | |
2891 | } | |
2892 | } | |
2893 | ||
2894 | #endif /* __AMP__ */ | |
cb323159 A |
2895 | |
2896 | #endif /* CONFIG_SCHED_CLUTCH */ |