]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/sched_amp_common.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / osfmk / kern / sched_amp_common.c
CommitLineData
c6bf4f31
A
1/*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/machine.h>
31#include <machine/machine_routines.h>
32#include <machine/sched_param.h>
33#include <machine/machine_cpu.h>
34#include <kern/kern_types.h>
35#include <kern/debug.h>
36#include <kern/machine.h>
37#include <kern/misc_protos.h>
38#include <kern/processor.h>
39#include <kern/queue.h>
40#include <kern/sched.h>
41#include <kern/sched_prim.h>
42#include <kern/task.h>
43#include <kern/thread.h>
44#include <machine/atomic.h>
45#include <sys/kdebug.h>
46#include <kern/sched_amp_common.h>
f427ee49 47#include <stdatomic.h>
c6bf4f31
A
48
49#if __AMP__
50
51/* Exported globals */
52processor_set_t ecore_set = NULL;
53processor_set_t pcore_set = NULL;
54
55static struct processor_set pset1;
56static struct pset_node pset_node1;
57
58#if DEVELOPMENT || DEBUG
59bool system_ecore_only = false;
60#endif /* DEVELOPMENT || DEBUG */
61
62/*
63 * sched_amp_init()
64 *
65 * Initialize the pcore_set and ecore_set globals which describe the
66 * P/E processor sets.
67 */
68void
69sched_amp_init(void)
70{
71 pset_init(&pset1, &pset_node1);
72 pset_node1.psets = &pset1;
73 pset_node0.node_list = &pset_node1;
74
75 if (ml_get_boot_cluster() == CLUSTER_TYPE_P) {
76 pcore_set = &pset0;
77 ecore_set = &pset1;
78 } else {
79 ecore_set = &pset0;
80 pcore_set = &pset1;
81 }
82
83 ecore_set->pset_cluster_type = PSET_AMP_E;
84 ecore_set->pset_cluster_id = 0;
85
86 pcore_set->pset_cluster_type = PSET_AMP_P;
87 pcore_set->pset_cluster_id = 1;
88
c6bf4f31
A
89#if DEVELOPMENT || DEBUG
90 if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
91 system_ecore_only = true;
92 }
93#endif /* DEVELOPMENT || DEBUG */
94
c6bf4f31
A
95 sched_timeshare_init();
96}
97
98/* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */
99int sched_amp_spill_count = 3;
100int sched_amp_idle_steal = 1;
101int sched_amp_spill_steal = 1;
102
103/*
104 * We see performance gains from doing immediate IPIs to P-cores to run
105 * P-eligible threads and lesser P-E migrations from using deferred IPIs
106 * for spill.
107 */
108int sched_amp_spill_deferred_ipi = 1;
109int sched_amp_pcores_preempt_immediate_ipi = 1;
110
f427ee49
A
111/*
112 * sched_perfcontrol_inherit_recommendation_from_tg changes amp
113 * scheduling policy away from default and allows policy to be
114 * modified at run-time.
115 *
116 * once modified from default, the policy toggles between "follow
117 * thread group" and "restrict to e".
118 */
119
120_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util = SCHED_PERFCTL_POLICY_DEFAULT;
121_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg = SCHED_PERFCTL_POLICY_DEFAULT;
c6bf4f31
A
122
123/*
124 * sched_amp_spill_threshold()
125 *
126 * Routine to calulate spill threshold which decides if cluster should spill.
127 */
128int
129sched_amp_spill_threshold(processor_set_t pset)
130{
131 int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
132
133 return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
134}
135
136/*
137 * pset_signal_spill()
138 *
139 * Routine to signal a running/idle CPU to cause a spill onto that CPU.
140 * Called with pset locked, returns unlocked
141 */
142void
143pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
144{
145 processor_t processor;
146 sched_ipi_type_t ipi_type = SCHED_IPI_NONE;
147
148 uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
149 for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) {
150 processor = processor_array[cpuid];
151 if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
152 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0);
153
154 processor->deadline = UINT64_MAX;
155 pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);
156
157 if (processor == current_processor()) {
158 bit_set(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id);
159 } else {
160 ipi_type = sched_ipi_action(processor, NULL, true, SCHED_IPI_EVENT_SPILL);
161 }
162 pset_unlock(pset);
163 sched_ipi_perform(processor, ipi_type);
164 return;
165 }
166 }
167
168 processor_t ast_processor = NULL;
169 uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
170 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
171 processor = processor_array[cpuid];
172 if (processor->current_recommended_pset_type == PSET_AMP_P) {
173 /* Already running a spilled P-core recommended thread */
174 continue;
175 }
176 if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
177 /* Already received a spill signal */
178 continue;
179 }
180 if (processor->current_pri >= spilled_thread_priority) {
181 /* Already running a higher or equal priority thread */
182 continue;
183 }
184
185 /* Found a suitable processor */
186 bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
187 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0);
188 if (processor == current_processor()) {
189 ast_on(AST_PREEMPT);
190 }
191 ipi_type = sched_ipi_action(processor, NULL, false, SCHED_IPI_EVENT_SPILL);
192 if (ipi_type != SCHED_IPI_NONE) {
193 ast_processor = processor;
194 }
195 break;
196 }
197
198 pset_unlock(pset);
199 sched_ipi_perform(ast_processor, ipi_type);
200}
201
202/*
203 * pset_should_accept_spilled_thread()
204 *
205 * Routine to decide if pset should accept spilled threads.
206 * This function must be safe to call (to use as a hint) without holding the pset lock.
207 */
208bool
209pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
210{
211 if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
212 return true;
213 }
214
215 uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);
216
217 for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) {
218 processor_t processor = processor_array[cpuid];
219
220 if (processor->current_recommended_pset_type == PSET_AMP_P) {
221 /* This processor is already running a spilled thread */
222 continue;
223 }
224
225 if (processor->current_pri < spilled_thread_priority) {
226 return true;
227 }
228 }
229
230 return false;
231}
232
233/*
234 * should_spill_to_ecores()
235 *
236 * Spill policy is implemented here
237 */
238bool
239should_spill_to_ecores(processor_set_t nset, thread_t thread)
240{
241 if (nset->pset_cluster_type == PSET_AMP_E) {
242 /* Not relevant if ecores already preferred */
243 return false;
244 }
245
246 if (!pset_is_recommended(ecore_set)) {
247 /* E cores must be recommended */
248 return false;
249 }
250
c6bf4f31
A
251 if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
252 return false;
253 }
c6bf4f31
A
254
255 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
256 /* Never spill realtime threads */
257 return false;
258 }
259
260 if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
261 /* Don't spill if idle cores */
262 return false;
263 }
264
f427ee49 265 if ((sched_get_pset_load_average(nset, 0) >= sched_amp_spill_threshold(nset)) && /* There is already a load on P cores */
c6bf4f31
A
266 pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */
267 return true;
268 }
269
270 return false;
271}
272
273/*
274 * sched_amp_check_spill()
275 *
276 * Routine to check if the thread should be spilled and signal the pset if needed.
277 */
278void
279sched_amp_check_spill(processor_set_t pset, thread_t thread)
280{
281 /* pset is unlocked */
282
283 /* Bound threads don't call this function */
284 assert(thread->bound_processor == PROCESSOR_NULL);
285
286 if (should_spill_to_ecores(pset, thread)) {
287 pset_lock(ecore_set);
288
289 pset_signal_spill(ecore_set, thread->sched_pri);
290 /* returns with ecore_set unlocked */
291 }
292}
293
294/*
295 * sched_amp_steal_threshold()
296 *
297 * Routine to calculate the steal threshold
298 */
299int
300sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
301{
302 int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
303
304 return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
305}
306
307/*
308 * sched_amp_steal_thread_enabled()
309 *
310 */
311bool
312sched_amp_steal_thread_enabled(processor_set_t pset)
313{
314 return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set->online_processor_count > 0);
315}
316
317/*
318 * sched_amp_balance()
319 *
320 * Invoked with pset locked, returns with pset unlocked
321 */
322void
323sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
324{
325 assert(cprocessor == current_processor());
326
327 pset_unlock(cpset);
328
329 if (cpset->pset_cluster_type == PSET_AMP_E || !cprocessor->is_recommended) {
330 return;
331 }
332
333 /*
334 * cprocessor is an idle, recommended P core processor.
335 * Look for P-eligible threads that have spilled to an E core
336 * and coax them to come back.
337 */
338
339 processor_set_t pset = ecore_set;
340
341 pset_lock(pset);
342
343 processor_t eprocessor;
344 uint64_t ast_processor_map = 0;
345
346 sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
347 uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
348 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
349 eprocessor = processor_array[cpuid];
350 if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
351 (eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
352 ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
353 if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
354 bit_set(ast_processor_map, eprocessor->cpu_id);
355 assert(eprocessor != cprocessor);
356 }
357 }
358 }
359
360 pset_unlock(pset);
361
362 for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
363 processor_t ast_processor = processor_array[cpuid];
364 sched_ipi_perform(ast_processor, ipi_type[cpuid]);
365 }
366}
367
368/*
369 * Helper function for sched_amp_thread_group_recommendation_change()
370 * Find all the cores in the pset running threads from the thread_group tg
371 * and send them a rebalance interrupt.
372 */
373void
374sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
375{
376 assert(pset->pset_cluster_type == PSET_AMP_E);
377 uint64_t ast_processor_map = 0;
378 sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
379
380 spl_t s = splsched();
381 pset_lock(pset);
382
383 uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
384 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
385 processor_t eprocessor = processor_array[cpuid];
386 if (eprocessor->current_thread_group == tg) {
387 ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
388 if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
389 bit_set(ast_processor_map, eprocessor->cpu_id);
390 } else if (eprocessor == current_processor()) {
391 ast_on(AST_PREEMPT);
392 bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
393 }
394 }
395 }
396
397 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) | DBG_FUNC_NONE, tg, ast_processor_map, 0, 0);
398
399 pset_unlock(pset);
400
401 for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
402 processor_t ast_processor = processor_array[cpuid];
403 sched_ipi_perform(ast_processor, ipi_type[cpuid]);
404 }
405
406 splx(s);
407}
408
409/*
410 * sched_amp_ipi_policy()
411 */
412sched_ipi_type_t
413sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
414{
415 processor_set_t pset = dst->processor_set;
416 assert(bit_test(pset->pending_AST_URGENT_cpu_mask, dst->cpu_id) == false);
417 assert(dst != current_processor());
418
419 boolean_t deferred_ipi_supported = false;
420#if defined(CONFIG_SCHED_DEFERRED_AST)
421 deferred_ipi_supported = true;
422#endif /* CONFIG_SCHED_DEFERRED_AST */
423
424 switch (event) {
425 case SCHED_IPI_EVENT_SPILL:
426 /* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */
427 if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
428 return sched_ipi_deferred_policy(pset, dst, event);
429 }
430 break;
431 case SCHED_IPI_EVENT_PREEMPT:
432 /* For preemption, the default policy is to use deferred IPIs
433 * for Non-RT P-core preemption. Override that behavior if
434 * sched_amp_pcores_preempt_immediate_ipi is set
435 */
436 if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
437 if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
438 return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
439 }
440 }
441 break;
442 default:
443 break;
444 }
445 /* Default back to the global policy for all other scenarios */
446 return sched_ipi_policy(dst, thread, dst_idle, event);
447}
448
449/*
450 * sched_amp_qos_max_parallelism()
451 */
452uint32_t
453sched_amp_qos_max_parallelism(int qos, uint64_t options)
454{
455 uint32_t ecount = ecore_set->cpu_set_count;
456 uint32_t pcount = pcore_set->cpu_set_count;
457
458 if (options & QOS_PARALLELISM_REALTIME) {
459 /* For realtime threads on AMP, we would want them
460 * to limit the width to just the P-cores since we
461 * do not spill/rebalance for RT threads.
462 */
463 return pcount;
464 }
465
466 /*
f427ee49
A
467 * The default AMP scheduler policy is to run utility and by
468 * threads on E-Cores only. Run-time policy adjustment unlocks
469 * ability of utility and bg to threads to be scheduled based on
470 * run-time conditions.
c6bf4f31
A
471 */
472 switch (qos) {
473 case THREAD_QOS_UTILITY:
f427ee49 474 return (os_atomic_load(&sched_perfctl_policy_util, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
c6bf4f31
A
475 case THREAD_QOS_BACKGROUND:
476 case THREAD_QOS_MAINTENANCE:
f427ee49 477 return (os_atomic_load(&sched_perfctl_policy_bg, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
c6bf4f31
A
478 default:
479 return ecount + pcount;
480 }
481}
482
f427ee49
A
483pset_node_t
484sched_amp_choose_node(thread_t thread)
485{
486 if (recommended_pset_type(thread) == PSET_AMP_P) {
487 return pcore_set->node;
488 } else {
489 return ecore_set->node;
490 }
491}
492
c6bf4f31
A
493/*
494 * sched_amp_rt_runq()
495 */
496rt_queue_t
497sched_amp_rt_runq(processor_set_t pset)
498{
499 return &pset->rt_runq;
500}
501
502/*
503 * sched_amp_rt_init()
504 */
505void
506sched_amp_rt_init(processor_set_t pset)
507{
508 pset_rt_init(pset);
509}
510
511/*
512 * sched_amp_rt_queue_shutdown()
513 */
514void
515sched_amp_rt_queue_shutdown(processor_t processor)
516{
517 processor_set_t pset = processor->processor_set;
518 thread_t thread;
519 queue_head_t tqueue;
520
521 pset_lock(pset);
522
523 /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
524 if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
525 pset_unlock(pset);
526 return;
527 }
528
529 queue_init(&tqueue);
530
c6bf4f31
A
531 while (rt_runq_count(pset) > 0) {
532 thread = qe_dequeue_head(&pset->rt_runq.queue, struct thread, runq_links);
533 thread->runq = PROCESSOR_NULL;
f427ee49
A
534 SCHED_STATS_RUNQ_CHANGE(&pset->rt_runq.runq_stats,
535 os_atomic_load(&pset->rt_runq.count, relaxed));
c6bf4f31
A
536 rt_runq_count_decr(pset);
537 enqueue_tail(&tqueue, &thread->runq_links);
538 }
f427ee49 539 sched_update_pset_load_average(pset, 0);
c6bf4f31
A
540 pset_unlock(pset);
541
542 qe_foreach_element_safe(thread, &tqueue, runq_links) {
543 remqueue(&thread->runq_links);
544
545 thread_lock(thread);
546
547 thread_setrun(thread, SCHED_TAILQ);
548
549 thread_unlock(thread);
550 }
551}
552
553/*
554 * sched_amp_rt_runq_scan()
555 *
556 * Assumes RT lock is not held, and acquires splsched/rt_lock itself
557 */
558void
559sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context)
560{
561 thread_t thread;
562
563 pset_node_t node = &pset_node0;
564 processor_set_t pset = node->psets;
565
566 spl_t s = splsched();
567 do {
568 while (pset != NULL) {
f427ee49 569 pset_lock(pset);
c6bf4f31
A
570
571 qe_foreach_element_safe(thread, &pset->rt_runq.queue, runq_links) {
572 if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
573 scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
574 }
575 }
576
f427ee49 577 pset_unlock(pset);
c6bf4f31
A
578
579 pset = pset->pset_list;
580 }
581 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
582 splx(s);
583}
584
585/*
586 * sched_amp_rt_runq_count_sum()
587 */
588int64_t
589sched_amp_rt_runq_count_sum(void)
590{
591 pset_node_t node = &pset_node0;
592 processor_set_t pset = node->psets;
593 int64_t count = 0;
594
595 do {
596 while (pset != NULL) {
597 count += pset->rt_runq.runq_stats.count_sum;
598
599 pset = pset->pset_list;
600 }
601 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
602
603 return count;
604}
605
606#endif /* __AMP__ */