]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_amp_common.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / kern / sched_amp_common.c
1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/machine.h>
31 #include <machine/machine_routines.h>
32 #include <machine/sched_param.h>
33 #include <machine/machine_cpu.h>
34 #include <kern/kern_types.h>
35 #include <kern/debug.h>
36 #include <kern/machine.h>
37 #include <kern/misc_protos.h>
38 #include <kern/processor.h>
39 #include <kern/queue.h>
40 #include <kern/sched.h>
41 #include <kern/sched_prim.h>
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <machine/atomic.h>
45 #include <sys/kdebug.h>
46 #include <kern/sched_amp_common.h>
47
48 #if __AMP__
49
50 /* Exported globals */
51 processor_set_t ecore_set = NULL;
52 processor_set_t pcore_set = NULL;
53
54 static struct processor_set pset1;
55 static struct pset_node pset_node1;
56
57 #if DEVELOPMENT || DEBUG
58 bool system_ecore_only = false;
59 #endif /* DEVELOPMENT || DEBUG */
60
61 /*
62 * sched_amp_init()
63 *
64 * Initialize the pcore_set and ecore_set globals which describe the
65 * P/E processor sets.
66 */
67 void
68 sched_amp_init(void)
69 {
70 pset_init(&pset1, &pset_node1);
71 pset_node1.psets = &pset1;
72 pset_node0.node_list = &pset_node1;
73
74 if (ml_get_boot_cluster() == CLUSTER_TYPE_P) {
75 pcore_set = &pset0;
76 ecore_set = &pset1;
77 } else {
78 ecore_set = &pset0;
79 pcore_set = &pset1;
80 }
81
82 ecore_set->pset_cluster_type = PSET_AMP_E;
83 ecore_set->pset_cluster_id = 0;
84
85 pcore_set->pset_cluster_type = PSET_AMP_P;
86 pcore_set->pset_cluster_id = 1;
87
88 #if !CONFIG_SCHED_CLUTCH
89 /*
90 * For non-clutch scheduler, allow system to be e-core only.
91 * Clutch scheduler support for this feature needs to be implemented.
92 */
93 #if DEVELOPMENT || DEBUG
94 if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
95 system_ecore_only = true;
96 }
97 #endif /* DEVELOPMENT || DEBUG */
98
99 #endif /* !CONFIG_SCHED_CLUTCH */
100 sched_timeshare_init();
101 }
102
103 /* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */
104 int sched_amp_spill_count = 3;
105 int sched_amp_idle_steal = 1;
106 int sched_amp_spill_steal = 1;
107
108 /*
109 * We see performance gains from doing immediate IPIs to P-cores to run
110 * P-eligible threads and lesser P-E migrations from using deferred IPIs
111 * for spill.
112 */
113 int sched_amp_spill_deferred_ipi = 1;
114 int sched_amp_pcores_preempt_immediate_ipi = 1;
115
116
117 /*
118 * sched_amp_spill_threshold()
119 *
120 * Routine to calulate spill threshold which decides if cluster should spill.
121 */
122 int
123 sched_amp_spill_threshold(processor_set_t pset)
124 {
125 int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
126
127 return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
128 }
129
130 /*
131 * pset_signal_spill()
132 *
133 * Routine to signal a running/idle CPU to cause a spill onto that CPU.
134 * Called with pset locked, returns unlocked
135 */
136 void
137 pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
138 {
139 processor_t processor;
140 sched_ipi_type_t ipi_type = SCHED_IPI_NONE;
141
142 uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
143 for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) {
144 processor = processor_array[cpuid];
145 if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
146 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0);
147
148 processor->deadline = UINT64_MAX;
149 pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);
150
151 if (processor == current_processor()) {
152 bit_set(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id);
153 } else {
154 ipi_type = sched_ipi_action(processor, NULL, true, SCHED_IPI_EVENT_SPILL);
155 }
156 pset_unlock(pset);
157 sched_ipi_perform(processor, ipi_type);
158 return;
159 }
160 }
161
162 processor_t ast_processor = NULL;
163 uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
164 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
165 processor = processor_array[cpuid];
166 if (processor->current_recommended_pset_type == PSET_AMP_P) {
167 /* Already running a spilled P-core recommended thread */
168 continue;
169 }
170 if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
171 /* Already received a spill signal */
172 continue;
173 }
174 if (processor->current_pri >= spilled_thread_priority) {
175 /* Already running a higher or equal priority thread */
176 continue;
177 }
178
179 /* Found a suitable processor */
180 bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
181 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0);
182 if (processor == current_processor()) {
183 ast_on(AST_PREEMPT);
184 }
185 ipi_type = sched_ipi_action(processor, NULL, false, SCHED_IPI_EVENT_SPILL);
186 if (ipi_type != SCHED_IPI_NONE) {
187 ast_processor = processor;
188 }
189 break;
190 }
191
192 pset_unlock(pset);
193 sched_ipi_perform(ast_processor, ipi_type);
194 }
195
196 /*
197 * pset_should_accept_spilled_thread()
198 *
199 * Routine to decide if pset should accept spilled threads.
200 * This function must be safe to call (to use as a hint) without holding the pset lock.
201 */
202 bool
203 pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
204 {
205 if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
206 return true;
207 }
208
209 uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);
210
211 for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) {
212 processor_t processor = processor_array[cpuid];
213
214 if (processor->current_recommended_pset_type == PSET_AMP_P) {
215 /* This processor is already running a spilled thread */
216 continue;
217 }
218
219 if (processor->current_pri < spilled_thread_priority) {
220 return true;
221 }
222 }
223
224 return false;
225 }
226
227 /*
228 * should_spill_to_ecores()
229 *
230 * Spill policy is implemented here
231 */
232 bool
233 should_spill_to_ecores(processor_set_t nset, thread_t thread)
234 {
235 if (nset->pset_cluster_type == PSET_AMP_E) {
236 /* Not relevant if ecores already preferred */
237 return false;
238 }
239
240 if (!pset_is_recommended(ecore_set)) {
241 /* E cores must be recommended */
242 return false;
243 }
244
245 #if !CONFIG_SCHED_CLUTCH
246 /* Per-thread P-core scheduling support needs to be implemented for clutch scheduler */
247 if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
248 return false;
249 }
250 #endif /* !CONFIG_SCHED_CLUTCH */
251
252 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
253 /* Never spill realtime threads */
254 return false;
255 }
256
257 if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
258 /* Don't spill if idle cores */
259 return false;
260 }
261
262 if ((sched_get_pset_load_average(nset) >= sched_amp_spill_threshold(nset)) && /* There is already a load on P cores */
263 pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */
264 return true;
265 }
266
267 return false;
268 }
269
270 /*
271 * sched_amp_check_spill()
272 *
273 * Routine to check if the thread should be spilled and signal the pset if needed.
274 */
275 void
276 sched_amp_check_spill(processor_set_t pset, thread_t thread)
277 {
278 /* pset is unlocked */
279
280 /* Bound threads don't call this function */
281 assert(thread->bound_processor == PROCESSOR_NULL);
282
283 if (should_spill_to_ecores(pset, thread)) {
284 pset_lock(ecore_set);
285
286 pset_signal_spill(ecore_set, thread->sched_pri);
287 /* returns with ecore_set unlocked */
288 }
289 }
290
291 /*
292 * sched_amp_steal_threshold()
293 *
294 * Routine to calculate the steal threshold
295 */
296 int
297 sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
298 {
299 int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
300
301 return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
302 }
303
304 /*
305 * sched_amp_steal_thread_enabled()
306 *
307 */
308 bool
309 sched_amp_steal_thread_enabled(processor_set_t pset)
310 {
311 return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set->online_processor_count > 0);
312 }
313
314 /*
315 * sched_amp_balance()
316 *
317 * Invoked with pset locked, returns with pset unlocked
318 */
319 void
320 sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
321 {
322 assert(cprocessor == current_processor());
323
324 pset_unlock(cpset);
325
326 if (cpset->pset_cluster_type == PSET_AMP_E || !cprocessor->is_recommended) {
327 return;
328 }
329
330 /*
331 * cprocessor is an idle, recommended P core processor.
332 * Look for P-eligible threads that have spilled to an E core
333 * and coax them to come back.
334 */
335
336 processor_set_t pset = ecore_set;
337
338 pset_lock(pset);
339
340 processor_t eprocessor;
341 uint64_t ast_processor_map = 0;
342
343 sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
344 uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
345 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
346 eprocessor = processor_array[cpuid];
347 if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
348 (eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
349 ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
350 if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
351 bit_set(ast_processor_map, eprocessor->cpu_id);
352 assert(eprocessor != cprocessor);
353 }
354 }
355 }
356
357 pset_unlock(pset);
358
359 for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
360 processor_t ast_processor = processor_array[cpuid];
361 sched_ipi_perform(ast_processor, ipi_type[cpuid]);
362 }
363 }
364
365 /*
366 * Helper function for sched_amp_thread_group_recommendation_change()
367 * Find all the cores in the pset running threads from the thread_group tg
368 * and send them a rebalance interrupt.
369 */
370 void
371 sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
372 {
373 assert(pset->pset_cluster_type == PSET_AMP_E);
374 uint64_t ast_processor_map = 0;
375 sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
376
377 spl_t s = splsched();
378 pset_lock(pset);
379
380 uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
381 for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
382 processor_t eprocessor = processor_array[cpuid];
383 if (eprocessor->current_thread_group == tg) {
384 ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
385 if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
386 bit_set(ast_processor_map, eprocessor->cpu_id);
387 } else if (eprocessor == current_processor()) {
388 ast_on(AST_PREEMPT);
389 bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
390 }
391 }
392 }
393
394 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) | DBG_FUNC_NONE, tg, ast_processor_map, 0, 0);
395
396 pset_unlock(pset);
397
398 for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
399 processor_t ast_processor = processor_array[cpuid];
400 sched_ipi_perform(ast_processor, ipi_type[cpuid]);
401 }
402
403 splx(s);
404 }
405
406 /*
407 * sched_amp_ipi_policy()
408 */
409 sched_ipi_type_t
410 sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
411 {
412 processor_set_t pset = dst->processor_set;
413 assert(bit_test(pset->pending_AST_URGENT_cpu_mask, dst->cpu_id) == false);
414 assert(dst != current_processor());
415
416 boolean_t deferred_ipi_supported = false;
417 #if defined(CONFIG_SCHED_DEFERRED_AST)
418 deferred_ipi_supported = true;
419 #endif /* CONFIG_SCHED_DEFERRED_AST */
420
421 switch (event) {
422 case SCHED_IPI_EVENT_SPILL:
423 /* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */
424 if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
425 return sched_ipi_deferred_policy(pset, dst, event);
426 }
427 break;
428 case SCHED_IPI_EVENT_PREEMPT:
429 /* For preemption, the default policy is to use deferred IPIs
430 * for Non-RT P-core preemption. Override that behavior if
431 * sched_amp_pcores_preempt_immediate_ipi is set
432 */
433 if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
434 if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
435 return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
436 }
437 }
438 break;
439 default:
440 break;
441 }
442 /* Default back to the global policy for all other scenarios */
443 return sched_ipi_policy(dst, thread, dst_idle, event);
444 }
445
446 /*
447 * sched_amp_qos_max_parallelism()
448 */
449 uint32_t
450 sched_amp_qos_max_parallelism(int qos, uint64_t options)
451 {
452 uint32_t ecount = ecore_set->cpu_set_count;
453 uint32_t pcount = pcore_set->cpu_set_count;
454
455 if (options & QOS_PARALLELISM_REALTIME) {
456 /* For realtime threads on AMP, we would want them
457 * to limit the width to just the P-cores since we
458 * do not spill/rebalance for RT threads.
459 */
460 return pcount;
461 }
462
463 /*
464 * The current AMP scheduler policy is not run
465 * background and utility threads on the P-Cores.
466 */
467 switch (qos) {
468 case THREAD_QOS_UTILITY:
469 case THREAD_QOS_BACKGROUND:
470 case THREAD_QOS_MAINTENANCE:
471 return ecount;
472 default:
473 return ecount + pcount;
474 }
475 }
476
477 /*
478 * sched_amp_rt_runq()
479 */
480 rt_queue_t
481 sched_amp_rt_runq(processor_set_t pset)
482 {
483 return &pset->rt_runq;
484 }
485
486 /*
487 * sched_amp_rt_init()
488 */
489 void
490 sched_amp_rt_init(processor_set_t pset)
491 {
492 pset_rt_init(pset);
493 }
494
495 /*
496 * sched_amp_rt_queue_shutdown()
497 */
498 void
499 sched_amp_rt_queue_shutdown(processor_t processor)
500 {
501 processor_set_t pset = processor->processor_set;
502 thread_t thread;
503 queue_head_t tqueue;
504
505 pset_lock(pset);
506
507 /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
508 if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
509 pset_unlock(pset);
510 return;
511 }
512
513 queue_init(&tqueue);
514
515 rt_lock_lock(pset);
516
517 while (rt_runq_count(pset) > 0) {
518 thread = qe_dequeue_head(&pset->rt_runq.queue, struct thread, runq_links);
519 thread->runq = PROCESSOR_NULL;
520 SCHED_STATS_RUNQ_CHANGE(&pset->rt_runq.runq_stats, pset->rt_runq.count);
521 rt_runq_count_decr(pset);
522 enqueue_tail(&tqueue, &thread->runq_links);
523 }
524 rt_lock_unlock(pset);
525 sched_update_pset_load_average(pset);
526 pset_unlock(pset);
527
528 qe_foreach_element_safe(thread, &tqueue, runq_links) {
529 remqueue(&thread->runq_links);
530
531 thread_lock(thread);
532
533 thread_setrun(thread, SCHED_TAILQ);
534
535 thread_unlock(thread);
536 }
537 }
538
539 /*
540 * sched_amp_rt_runq_scan()
541 *
542 * Assumes RT lock is not held, and acquires splsched/rt_lock itself
543 */
544 void
545 sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context)
546 {
547 thread_t thread;
548
549 pset_node_t node = &pset_node0;
550 processor_set_t pset = node->psets;
551
552 spl_t s = splsched();
553 do {
554 while (pset != NULL) {
555 rt_lock_lock(pset);
556
557 qe_foreach_element_safe(thread, &pset->rt_runq.queue, runq_links) {
558 if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
559 scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
560 }
561 }
562
563 rt_lock_unlock(pset);
564
565 pset = pset->pset_list;
566 }
567 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
568 splx(s);
569 }
570
571 /*
572 * sched_amp_rt_runq_count_sum()
573 */
574 int64_t
575 sched_amp_rt_runq_count_sum(void)
576 {
577 pset_node_t node = &pset_node0;
578 processor_set_t pset = node->psets;
579 int64_t count = 0;
580
581 do {
582 while (pset != NULL) {
583 count += pset->rt_runq.runq_stats.count_sum;
584
585 pset = pset->pset_list;
586 }
587 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
588
589 return count;
590 }
591
592 #endif /* __AMP__ */