2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * processor.c: processor and processor_set manipulation routines.
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
81 #include <kperf/kperf.h>
83 #include <ipc/ipc_port.h>
85 #include <security/mac_mach_internal.h>
87 #if defined(CONFIG_XNUPOST)
89 #include <tests/xnupost.h>
91 #endif /* CONFIG_XNUPOST */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
99 struct processor_set pset0
;
100 struct pset_node pset_node0
;
102 static SIMPLE_LOCK_DECLARE(pset_node_lock
, 0);
103 LCK_GRP_DECLARE(pset_lck_grp
, "pset");
106 queue_head_t terminated_tasks
; /* To be used ONLY for stackshot. */
107 queue_head_t corpse_tasks
;
109 int terminated_tasks_count
;
110 queue_head_t threads
;
112 LCK_GRP_DECLARE(task_lck_grp
, "task");
113 LCK_ATTR_DECLARE(task_lck_attr
, 0, 0);
114 LCK_MTX_DECLARE_ATTR(tasks_threads_lock
, &task_lck_grp
, &task_lck_attr
);
115 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock
, &task_lck_grp
, &task_lck_attr
);
117 processor_t processor_list
;
118 unsigned int processor_count
;
119 static processor_t processor_list_tail
;
120 SIMPLE_LOCK_DECLARE(processor_list_lock
, 0);
122 uint32_t processor_avail_count
;
123 uint32_t processor_avail_count_user
;
124 uint32_t primary_processor_avail_count
;
125 uint32_t primary_processor_avail_count_user
;
129 struct processor
PERCPU_DATA(processor
);
130 processor_t processor_array
[MAX_SCHED_CPUS
] = { 0 };
131 processor_set_t pset_array
[MAX_PSETS
] = { 0 };
133 static timer_call_func_t running_timer_funcs
[] = {
134 [RUNNING_TIMER_QUANTUM
] = thread_quantum_expire
,
135 [RUNNING_TIMER_KPERF
] = kperf_timer_expire
,
137 static_assert(sizeof(running_timer_funcs
) / sizeof(running_timer_funcs
[0])
138 == RUNNING_TIMER_MAX
, "missing running timer function");
140 #if defined(CONFIG_XNUPOST)
141 kern_return_t
ipi_test(void);
142 extern void arm64_ipi_test(void);
150 for (p
= processor_list
; p
!= NULL
; p
= p
->processor_list
) {
152 thread_block(THREAD_CONTINUE_NULL
);
153 kprintf("Running IPI test on cpu %d\n", p
->cpu_id
);
157 /* unbind thread from specific cpu */
158 thread_bind(PROCESSOR_NULL
);
159 thread_block(THREAD_CONTINUE_NULL
);
161 T_PASS("Done running IPI tests");
163 T_PASS("Unsupported platform. Not running IPI tests");
165 #endif /* __arm64__ */
169 #endif /* defined(CONFIG_XNUPOST) */
171 int sched_enable_smt
= 1;
174 processor_bootstrap(void)
176 pset_node0
.psets
= &pset0
;
177 pset_init(&pset0
, &pset_node0
);
180 queue_init(&terminated_tasks
);
181 queue_init(&threads
);
182 queue_init(&corpse_tasks
);
184 processor_init(master_processor
, master_cpu
, &pset0
);
188 * Initialize the given processor for the cpu
189 * indicated by cpu_id, and assign to the
190 * specified processor set.
194 processor_t processor
,
196 processor_set_t pset
)
200 assert(cpu_id
< MAX_SCHED_CPUS
);
201 processor
->cpu_id
= cpu_id
;
203 if (processor
!= master_processor
) {
204 /* Scheduler state for master_processor initialized in sched_init() */
205 SCHED(processor_init
)(processor
);
208 processor
->state
= PROCESSOR_OFF_LINE
;
209 processor
->active_thread
= processor
->startup_thread
= processor
->idle_thread
= THREAD_NULL
;
210 processor
->processor_set
= pset
;
211 processor_state_update_idle(processor
);
212 processor
->starting_pri
= MINPRI
;
213 processor
->quantum_end
= UINT64_MAX
;
214 processor
->deadline
= UINT64_MAX
;
215 processor
->first_timeslice
= FALSE
;
216 processor
->processor_offlined
= false;
217 processor
->processor_primary
= processor
; /* no SMT relationship known at this point */
218 processor
->processor_secondary
= NULL
;
219 processor
->is_SMT
= false;
220 processor
->is_recommended
= true;
221 processor
->processor_self
= IP_NULL
;
222 processor
->processor_list
= NULL
;
223 processor
->must_idle
= false;
224 processor
->running_timers_active
= false;
225 for (int i
= 0; i
< RUNNING_TIMER_MAX
; i
++) {
226 timer_call_setup(&processor
->running_timers
[i
],
227 running_timer_funcs
[i
], processor
);
228 running_timer_clear(processor
, i
);
231 timer_init(&processor
->idle_state
);
232 timer_init(&processor
->system_state
);
233 timer_init(&processor
->user_state
);
237 bit_set(pset
->cpu_bitmask
, cpu_id
);
238 bit_set(pset
->recommended_bitmask
, cpu_id
);
239 bit_set(pset
->primary_map
, cpu_id
);
240 bit_set(pset
->cpu_state_map
[PROCESSOR_OFF_LINE
], cpu_id
);
241 if (pset
->cpu_set_count
++ == 0) {
242 pset
->cpu_set_low
= pset
->cpu_set_hi
= cpu_id
;
244 pset
->cpu_set_low
= (cpu_id
< pset
->cpu_set_low
)? cpu_id
: pset
->cpu_set_low
;
245 pset
->cpu_set_hi
= (cpu_id
> pset
->cpu_set_hi
)? cpu_id
: pset
->cpu_set_hi
;
250 simple_lock(&processor_list_lock
, LCK_GRP_NULL
);
251 if (processor_list
== NULL
) {
252 processor_list
= processor
;
254 processor_list_tail
->processor_list
= processor
;
256 processor_list_tail
= processor
;
258 processor_array
[cpu_id
] = processor
;
259 simple_unlock(&processor_list_lock
);
262 bool system_is_SMT
= false;
265 processor_set_primary(
266 processor_t processor
,
269 assert(processor
->processor_primary
== primary
|| processor
->processor_primary
== processor
);
270 /* Re-adjust primary point for this (possibly) secondary processor */
271 processor
->processor_primary
= primary
;
273 assert(primary
->processor_secondary
== NULL
|| primary
->processor_secondary
== processor
);
274 if (primary
!= processor
) {
275 /* Link primary to secondary, assumes a 2-way SMT model
276 * We'll need to move to a queue if any future architecture
277 * requires otherwise.
279 assert(processor
->processor_secondary
== NULL
);
280 primary
->processor_secondary
= processor
;
281 /* Mark both processors as SMT siblings */
282 primary
->is_SMT
= TRUE
;
283 processor
->is_SMT
= TRUE
;
285 if (!system_is_SMT
) {
286 system_is_SMT
= true;
289 processor_set_t pset
= processor
->processor_set
;
290 spl_t s
= splsched();
295 bit_clear(pset
->primary_map
, processor
->cpu_id
);
303 processor_t processor
)
305 return processor
->processor_set
;
308 #if CONFIG_SCHED_EDGE
311 pset_type_for_id(uint32_t cluster_id
)
313 return pset_array
[cluster_id
]->pset_type
;
317 * Processor foreign threads
319 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
320 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
321 * if its of a different type than its preferred cluster type (E/P). The bitmap should
322 * be updated every time a new thread is assigned to run on a processor.
324 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
328 processor_state_update_running_foreign(processor_t processor
, thread_t thread
)
330 cluster_type_t current_processor_type
= pset_type_for_id(processor
->processor_set
->pset_cluster_id
);
331 cluster_type_t thread_type
= pset_type_for_id(sched_edge_thread_preferred_cluster(thread
));
333 /* Update the bitmap for the pset only for unbounded non-RT threads. */
334 if ((processor
->current_pri
< BASEPRI_RTQUEUES
) && (thread
->bound_processor
== PROCESSOR_NULL
) && (current_processor_type
!= thread_type
)) {
335 bit_set(processor
->processor_set
->cpu_running_foreign
, processor
->cpu_id
);
337 bit_clear(processor
->processor_set
->cpu_running_foreign
, processor
->cpu_id
);
340 #else /* CONFIG_SCHED_EDGE */
342 processor_state_update_running_foreign(__unused processor_t processor
, __unused thread_t thread
)
345 #endif /* CONFIG_SCHED_EDGE */
348 processor_state_update_idle(processor_t processor
)
350 processor
->current_pri
= IDLEPRI
;
351 processor
->current_sfi_class
= SFI_CLASS_KERNEL
;
352 processor
->current_recommended_pset_type
= PSET_SMP
;
353 #if CONFIG_THREAD_GROUPS
354 processor
->current_thread_group
= NULL
;
356 processor
->current_perfctl_class
= PERFCONTROL_CLASS_IDLE
;
357 processor
->current_urgency
= THREAD_URGENCY_NONE
;
358 processor
->current_is_NO_SMT
= false;
359 processor
->current_is_bound
= false;
360 os_atomic_store(&processor
->processor_set
->cpu_running_buckets
[processor
->cpu_id
], TH_BUCKET_SCHED_MAX
, relaxed
);
364 processor_state_update_from_thread(processor_t processor
, thread_t thread
)
366 processor
->current_pri
= thread
->sched_pri
;
367 processor
->current_sfi_class
= thread
->sfi_class
;
368 processor
->current_recommended_pset_type
= recommended_pset_type(thread
);
369 processor_state_update_running_foreign(processor
, thread
);
370 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
371 sched_bucket_t bucket
= ((thread
->state
& TH_IDLE
) || (thread
->bound_processor
!= PROCESSOR_NULL
)) ? TH_BUCKET_SCHED_MAX
: thread
->th_sched_bucket
;
372 os_atomic_store(&processor
->processor_set
->cpu_running_buckets
[processor
->cpu_id
], bucket
, relaxed
);
374 #if CONFIG_THREAD_GROUPS
375 processor
->current_thread_group
= thread_group_get(thread
);
377 processor
->current_perfctl_class
= thread_get_perfcontrol_class(thread
);
378 processor
->current_urgency
= thread_get_urgency(thread
, NULL
, NULL
);
379 processor
->current_is_NO_SMT
= thread_no_smt(thread
);
380 processor
->current_is_bound
= thread
->bound_processor
!= PROCESSOR_NULL
;
384 processor_state_update_explicit(processor_t processor
, int pri
, sfi_class_id_t sfi_class
,
385 pset_cluster_type_t pset_type
, perfcontrol_class_t perfctl_class
, thread_urgency_t urgency
, sched_bucket_t bucket
)
387 processor
->current_pri
= pri
;
388 processor
->current_sfi_class
= sfi_class
;
389 processor
->current_recommended_pset_type
= pset_type
;
390 processor
->current_perfctl_class
= perfctl_class
;
391 processor
->current_urgency
= urgency
;
392 os_atomic_store(&processor
->processor_set
->cpu_running_buckets
[processor
->cpu_id
], bucket
, relaxed
);
405 /* some schedulers do not support multiple psets */
406 if (SCHED(multiple_psets_enabled
) == FALSE
) {
407 return processor_pset(master_processor
);
410 processor_set_t
*prev
, pset
= zalloc_permanent_type(struct processor_set
);
412 if (pset
!= PROCESSOR_SET_NULL
) {
413 pset_init(pset
, node
);
415 simple_lock(&pset_node_lock
, LCK_GRP_NULL
);
418 while (*prev
!= PROCESSOR_SET_NULL
) {
419 prev
= &(*prev
)->pset_list
;
424 simple_unlock(&pset_node_lock
);
431 * Find processor set with specified cluster_id.
432 * Returns default_pset if not found.
437 processor_set_t default_pset
)
439 simple_lock(&pset_node_lock
, LCK_GRP_NULL
);
440 pset_node_t node
= &pset_node0
;
441 processor_set_t pset
= NULL
;
445 while (pset
!= NULL
) {
446 if (pset
->pset_cluster_id
== cluster_id
) {
449 pset
= pset
->pset_list
;
451 } while (pset
== NULL
&& (node
= node
->node_list
) != NULL
);
452 simple_unlock(&pset_node_lock
);
461 * Initialize the given processor_set structure.
465 processor_set_t pset
,
468 static uint32_t pset_count
= 0;
470 if (pset
!= &pset0
) {
472 * Scheduler runqueue initialization for non-boot psets.
473 * This initialization for pset0 happens in sched_init().
475 SCHED(pset_init
)(pset
);
476 SCHED(rt_init
)(pset
);
479 pset
->online_processor_count
= 0;
480 pset
->load_average
= 0;
481 bzero(&pset
->pset_load_average
, sizeof(pset
->pset_load_average
));
482 #if CONFIG_SCHED_EDGE
483 bzero(&pset
->pset_execution_time
, sizeof(pset
->pset_execution_time
));
484 #endif /* CONFIG_SCHED_EDGE */
485 pset
->cpu_set_low
= pset
->cpu_set_hi
= 0;
486 pset
->cpu_set_count
= 0;
487 pset
->last_chosen
= -1;
488 pset
->cpu_bitmask
= 0;
489 pset
->recommended_bitmask
= 0;
490 pset
->primary_map
= 0;
491 pset
->realtime_map
= 0;
492 pset
->cpu_running_foreign
= 0;
494 for (uint i
= 0; i
< PROCESSOR_STATE_LEN
; i
++) {
495 pset
->cpu_state_map
[i
] = 0;
497 pset
->pending_AST_URGENT_cpu_mask
= 0;
498 pset
->pending_AST_PREEMPT_cpu_mask
= 0;
499 #if defined(CONFIG_SCHED_DEFERRED_AST)
500 pset
->pending_deferred_AST_cpu_mask
= 0;
502 pset
->pending_spill_cpu_mask
= 0;
503 pset_lock_init(pset
);
504 pset
->pset_self
= IP_NULL
;
505 pset
->pset_name_self
= IP_NULL
;
506 pset
->pset_list
= PROCESSOR_SET_NULL
;
510 * The pset_cluster_type & pset_cluster_id for all psets
511 * on the platform are initialized as part of the SCHED(init).
512 * That works well for small cluster platforms; for large cluster
513 * count systems, it might be cleaner to do all the setup
514 * dynamically in SCHED(pset_init).
516 * <Edge Multi-cluster Support Needed>
518 pset
->is_SMT
= false;
520 simple_lock(&pset_node_lock
, LCK_GRP_NULL
);
521 pset
->pset_id
= pset_count
++;
522 bit_set(node
->pset_map
, pset
->pset_id
);
523 simple_unlock(&pset_node_lock
);
525 pset_array
[pset
->pset_id
] = pset
;
529 processor_info_count(
530 processor_flavor_t flavor
,
531 mach_msg_type_number_t
*count
)
534 case PROCESSOR_BASIC_INFO
:
535 *count
= PROCESSOR_BASIC_INFO_COUNT
;
538 case PROCESSOR_CPU_LOAD_INFO
:
539 *count
= PROCESSOR_CPU_LOAD_INFO_COUNT
;
543 return cpu_info_count(flavor
, count
);
552 processor_t processor
,
553 processor_flavor_t flavor
,
555 processor_info_t info
,
556 mach_msg_type_number_t
*count
)
559 kern_return_t result
;
561 if (processor
== PROCESSOR_NULL
) {
562 return KERN_INVALID_ARGUMENT
;
565 cpu_id
= processor
->cpu_id
;
568 case PROCESSOR_BASIC_INFO
:
570 processor_basic_info_t basic_info
;
572 if (*count
< PROCESSOR_BASIC_INFO_COUNT
) {
576 basic_info
= (processor_basic_info_t
) info
;
577 basic_info
->cpu_type
= slot_type(cpu_id
);
578 basic_info
->cpu_subtype
= slot_subtype(cpu_id
);
579 state
= processor
->state
;
580 if (state
== PROCESSOR_OFF_LINE
581 #if defined(__x86_64__)
582 || !processor
->is_recommended
585 basic_info
->running
= FALSE
;
587 basic_info
->running
= TRUE
;
589 basic_info
->slot_num
= cpu_id
;
590 if (processor
== master_processor
) {
591 basic_info
->is_master
= TRUE
;
593 basic_info
->is_master
= FALSE
;
596 *count
= PROCESSOR_BASIC_INFO_COUNT
;
602 case PROCESSOR_CPU_LOAD_INFO
:
604 processor_cpu_load_info_t cpu_load_info
;
606 uint64_t idle_time_snapshot1
, idle_time_snapshot2
;
607 uint64_t idle_time_tstamp1
, idle_time_tstamp2
;
610 * We capture the accumulated idle time twice over
611 * the course of this function, as well as the timestamps
612 * when each were last updated. Since these are
613 * all done using non-atomic racy mechanisms, the
614 * most we can infer is whether values are stable.
615 * timer_grab() is the only function that can be
616 * used reliably on another processor's per-processor
620 if (*count
< PROCESSOR_CPU_LOAD_INFO_COUNT
) {
624 cpu_load_info
= (processor_cpu_load_info_t
) info
;
625 if (precise_user_kernel_time
) {
626 cpu_load_info
->cpu_ticks
[CPU_STATE_USER
] =
627 (uint32_t)(timer_grab(&processor
->user_state
) / hz_tick_interval
);
628 cpu_load_info
->cpu_ticks
[CPU_STATE_SYSTEM
] =
629 (uint32_t)(timer_grab(&processor
->system_state
) / hz_tick_interval
);
631 uint64_t tval
= timer_grab(&processor
->user_state
) +
632 timer_grab(&processor
->system_state
);
634 cpu_load_info
->cpu_ticks
[CPU_STATE_USER
] = (uint32_t)(tval
/ hz_tick_interval
);
635 cpu_load_info
->cpu_ticks
[CPU_STATE_SYSTEM
] = 0;
638 idle_state
= &processor
->idle_state
;
639 idle_time_snapshot1
= timer_grab(idle_state
);
640 idle_time_tstamp1
= idle_state
->tstamp
;
643 * Idle processors are not continually updating their
644 * per-processor idle timer, so it may be extremely
645 * out of date, resulting in an over-representation
646 * of non-idle time between two measurement
647 * intervals by e.g. top(1). If we are non-idle, or
648 * have evidence that the timer is being updated
649 * concurrently, we consider its value up-to-date.
651 if (processor
->current_state
!= idle_state
) {
652 cpu_load_info
->cpu_ticks
[CPU_STATE_IDLE
] =
653 (uint32_t)(idle_time_snapshot1
/ hz_tick_interval
);
654 } else if ((idle_time_snapshot1
!= (idle_time_snapshot2
= timer_grab(idle_state
))) ||
655 (idle_time_tstamp1
!= (idle_time_tstamp2
= idle_state
->tstamp
))) {
656 /* Idle timer is being updated concurrently, second stamp is good enough */
657 cpu_load_info
->cpu_ticks
[CPU_STATE_IDLE
] =
658 (uint32_t)(idle_time_snapshot2
/ hz_tick_interval
);
661 * Idle timer may be very stale. Fortunately we have established
662 * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
664 idle_time_snapshot1
+= mach_absolute_time() - idle_time_tstamp1
;
666 cpu_load_info
->cpu_ticks
[CPU_STATE_IDLE
] =
667 (uint32_t)(idle_time_snapshot1
/ hz_tick_interval
);
670 cpu_load_info
->cpu_ticks
[CPU_STATE_NICE
] = 0;
672 *count
= PROCESSOR_CPU_LOAD_INFO_COUNT
;
679 result
= cpu_info(flavor
, cpu_id
, info
, count
);
680 if (result
== KERN_SUCCESS
) {
690 processor_t processor
)
692 processor_set_t pset
;
694 kern_return_t result
;
697 if (processor
== PROCESSOR_NULL
|| processor
->processor_set
== PROCESSOR_SET_NULL
) {
698 return KERN_INVALID_ARGUMENT
;
701 if (processor
== master_processor
) {
704 prev
= thread_bind(processor
);
705 thread_block(THREAD_CONTINUE_NULL
);
707 result
= cpu_start(processor
->cpu_id
);
714 bool scheduler_disable
= false;
716 if ((processor
->processor_primary
!= processor
) && (sched_enable_smt
== 0)) {
717 if (cpu_can_exit(processor
->cpu_id
)) {
721 * This secondary SMT processor must start in order to service interrupts,
722 * so instead it will be disabled at the scheduler level.
724 scheduler_disable
= true;
727 ml_cpu_begin_state_transition(processor
->cpu_id
);
729 pset
= processor
->processor_set
;
731 if (processor
->state
!= PROCESSOR_OFF_LINE
) {
734 ml_cpu_end_state_transition(processor
->cpu_id
);
739 pset_update_processor_state(pset
, processor
, PROCESSOR_START
);
744 * Create the idle processor thread.
746 if (processor
->idle_thread
== THREAD_NULL
) {
747 result
= idle_thread_create(processor
);
748 if (result
!= KERN_SUCCESS
) {
751 pset_update_processor_state(pset
, processor
, PROCESSOR_OFF_LINE
);
754 ml_cpu_end_state_transition(processor
->cpu_id
);
761 * If there is no active thread, the processor
762 * has never been started. Create a dedicated
765 if (processor
->active_thread
== THREAD_NULL
&&
766 processor
->startup_thread
== THREAD_NULL
) {
767 result
= kernel_thread_create(processor_start_thread
, NULL
, MAXPRI_KERNEL
, &thread
);
768 if (result
!= KERN_SUCCESS
) {
771 pset_update_processor_state(pset
, processor
, PROCESSOR_OFF_LINE
);
774 ml_cpu_end_state_transition(processor
->cpu_id
);
781 thread
->bound_processor
= processor
;
782 processor
->startup_thread
= thread
;
783 thread
->state
= TH_RUN
;
784 thread
->last_made_runnable_time
= mach_absolute_time();
785 thread_unlock(thread
);
788 thread_deallocate(thread
);
791 if (processor
->processor_self
== IP_NULL
) {
792 ipc_processor_init(processor
);
795 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED
, processor
->cpu_id
);
796 result
= cpu_start(processor
->cpu_id
);
797 if (result
!= KERN_SUCCESS
) {
800 pset_update_processor_state(pset
, processor
, PROCESSOR_OFF_LINE
);
803 ml_cpu_end_state_transition(processor
->cpu_id
);
807 if (scheduler_disable
) {
808 assert(processor
->processor_primary
!= processor
);
809 sched_processor_enable(processor
, FALSE
);
812 ipc_processor_enable(processor
);
813 ml_cpu_end_state_transition(processor
->cpu_id
);
814 ml_broadcast_cpu_event(CPU_ACTIVE
, processor
->cpu_id
);
822 processor_t processor
)
824 if (processor
== PROCESSOR_NULL
) {
825 return KERN_INVALID_ARGUMENT
;
828 return processor_shutdown(processor
);
833 processor_start_from_user(
834 processor_t processor
)
838 if (processor
== PROCESSOR_NULL
) {
839 return KERN_INVALID_ARGUMENT
;
842 if (!cpu_can_exit(processor
->cpu_id
)) {
843 ret
= sched_processor_enable(processor
, TRUE
);
845 ret
= processor_start(processor
);
852 processor_exit_from_user(
853 processor_t processor
)
857 if (processor
== PROCESSOR_NULL
) {
858 return KERN_INVALID_ARGUMENT
;
861 if (!cpu_can_exit(processor
->cpu_id
)) {
862 ret
= sched_processor_enable(processor
, FALSE
);
864 ret
= processor_shutdown(processor
);
871 enable_smt_processors(bool enable
)
873 if (machine_info
.logical_cpu_max
== machine_info
.physical_cpu_max
) {
874 /* Not an SMT system */
875 return KERN_INVALID_ARGUMENT
;
878 int ncpus
= machine_info
.logical_cpu_max
;
880 for (int i
= 1; i
< ncpus
; i
++) {
881 processor_t processor
= processor_array
[i
];
883 if (processor
->processor_primary
!= processor
) {
885 processor_start_from_user(processor
);
886 } else { /* Disable */
887 processor_exit_from_user(processor
);
893 host_basic_info_data_t hinfo
;
894 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
895 kern_return_t kret
= host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
896 if (kret
!= KERN_SUCCESS
) {
900 if (enable
&& (hinfo
.logical_cpu
!= hinfo
.logical_cpu_max
)) {
904 if (!enable
&& (hinfo
.logical_cpu
!= hinfo
.physical_cpu
)) {
913 processor_t processor
,
914 processor_info_t info
,
915 mach_msg_type_number_t count
)
917 if (processor
== PROCESSOR_NULL
) {
918 return KERN_INVALID_ARGUMENT
;
921 return cpu_control(processor
->cpu_id
, info
, count
);
925 processor_set_create(
926 __unused host_t host
,
927 __unused processor_set_t
*new_set
,
928 __unused processor_set_t
*new_name
)
934 processor_set_destroy(
935 __unused processor_set_t pset
)
941 processor_get_assignment(
942 processor_t processor
,
943 processor_set_t
*pset
)
947 if (processor
== PROCESSOR_NULL
) {
948 return KERN_INVALID_ARGUMENT
;
951 state
= processor
->state
;
952 if (state
== PROCESSOR_SHUTDOWN
|| state
== PROCESSOR_OFF_LINE
) {
963 processor_set_t pset
,
966 processor_set_info_t info
,
967 mach_msg_type_number_t
*count
)
969 if (pset
== PROCESSOR_SET_NULL
) {
970 return KERN_INVALID_ARGUMENT
;
973 if (flavor
== PROCESSOR_SET_BASIC_INFO
) {
974 processor_set_basic_info_t basic_info
;
976 if (*count
< PROCESSOR_SET_BASIC_INFO_COUNT
) {
980 basic_info
= (processor_set_basic_info_t
) info
;
981 #if defined(__x86_64__)
982 basic_info
->processor_count
= processor_avail_count_user
;
984 basic_info
->processor_count
= processor_avail_count
;
986 basic_info
->default_policy
= POLICY_TIMESHARE
;
988 *count
= PROCESSOR_SET_BASIC_INFO_COUNT
;
991 } else if (flavor
== PROCESSOR_SET_TIMESHARE_DEFAULT
) {
992 policy_timeshare_base_t ts_base
;
994 if (*count
< POLICY_TIMESHARE_BASE_COUNT
) {
998 ts_base
= (policy_timeshare_base_t
) info
;
999 ts_base
->base_priority
= BASEPRI_DEFAULT
;
1001 *count
= POLICY_TIMESHARE_BASE_COUNT
;
1003 return KERN_SUCCESS
;
1004 } else if (flavor
== PROCESSOR_SET_FIFO_DEFAULT
) {
1005 policy_fifo_base_t fifo_base
;
1007 if (*count
< POLICY_FIFO_BASE_COUNT
) {
1008 return KERN_FAILURE
;
1011 fifo_base
= (policy_fifo_base_t
) info
;
1012 fifo_base
->base_priority
= BASEPRI_DEFAULT
;
1014 *count
= POLICY_FIFO_BASE_COUNT
;
1016 return KERN_SUCCESS
;
1017 } else if (flavor
== PROCESSOR_SET_RR_DEFAULT
) {
1018 policy_rr_base_t rr_base
;
1020 if (*count
< POLICY_RR_BASE_COUNT
) {
1021 return KERN_FAILURE
;
1024 rr_base
= (policy_rr_base_t
) info
;
1025 rr_base
->base_priority
= BASEPRI_DEFAULT
;
1026 rr_base
->quantum
= 1;
1028 *count
= POLICY_RR_BASE_COUNT
;
1030 return KERN_SUCCESS
;
1031 } else if (flavor
== PROCESSOR_SET_TIMESHARE_LIMITS
) {
1032 policy_timeshare_limit_t ts_limit
;
1034 if (*count
< POLICY_TIMESHARE_LIMIT_COUNT
) {
1035 return KERN_FAILURE
;
1038 ts_limit
= (policy_timeshare_limit_t
) info
;
1039 ts_limit
->max_priority
= MAXPRI_KERNEL
;
1041 *count
= POLICY_TIMESHARE_LIMIT_COUNT
;
1043 return KERN_SUCCESS
;
1044 } else if (flavor
== PROCESSOR_SET_FIFO_LIMITS
) {
1045 policy_fifo_limit_t fifo_limit
;
1047 if (*count
< POLICY_FIFO_LIMIT_COUNT
) {
1048 return KERN_FAILURE
;
1051 fifo_limit
= (policy_fifo_limit_t
) info
;
1052 fifo_limit
->max_priority
= MAXPRI_KERNEL
;
1054 *count
= POLICY_FIFO_LIMIT_COUNT
;
1056 return KERN_SUCCESS
;
1057 } else if (flavor
== PROCESSOR_SET_RR_LIMITS
) {
1058 policy_rr_limit_t rr_limit
;
1060 if (*count
< POLICY_RR_LIMIT_COUNT
) {
1061 return KERN_FAILURE
;
1064 rr_limit
= (policy_rr_limit_t
) info
;
1065 rr_limit
->max_priority
= MAXPRI_KERNEL
;
1067 *count
= POLICY_RR_LIMIT_COUNT
;
1069 return KERN_SUCCESS
;
1070 } else if (flavor
== PROCESSOR_SET_ENABLED_POLICIES
) {
1073 if (*count
< (sizeof(*enabled
) / sizeof(int))) {
1074 return KERN_FAILURE
;
1077 enabled
= (int *) info
;
1078 *enabled
= POLICY_TIMESHARE
| POLICY_RR
| POLICY_FIFO
;
1080 *count
= sizeof(*enabled
) / sizeof(int);
1082 return KERN_SUCCESS
;
1087 return KERN_INVALID_ARGUMENT
;
1091 * processor_set_statistics
1093 * Returns scheduling statistics for a processor set.
1096 processor_set_statistics(
1097 processor_set_t pset
,
1099 processor_set_info_t info
,
1100 mach_msg_type_number_t
*count
)
1102 if (pset
== PROCESSOR_SET_NULL
|| pset
!= &pset0
) {
1103 return KERN_INVALID_PROCESSOR_SET
;
1106 if (flavor
== PROCESSOR_SET_LOAD_INFO
) {
1107 processor_set_load_info_t load_info
;
1109 if (*count
< PROCESSOR_SET_LOAD_INFO_COUNT
) {
1110 return KERN_FAILURE
;
1113 load_info
= (processor_set_load_info_t
) info
;
1115 load_info
->mach_factor
= sched_mach_factor
;
1116 load_info
->load_average
= sched_load_average
;
1118 load_info
->task_count
= tasks_count
;
1119 load_info
->thread_count
= threads_count
;
1121 *count
= PROCESSOR_SET_LOAD_INFO_COUNT
;
1122 return KERN_SUCCESS
;
1125 return KERN_INVALID_ARGUMENT
;
1129 * processor_set_max_priority:
1131 * Specify max priority permitted on processor set. This affects
1132 * newly created and assigned threads. Optionally change existing
1136 processor_set_max_priority(
1137 __unused processor_set_t pset
,
1138 __unused
int max_priority
,
1139 __unused boolean_t change_threads
)
1141 return KERN_INVALID_ARGUMENT
;
1145 * processor_set_policy_enable:
1147 * Allow indicated policy on processor set.
1151 processor_set_policy_enable(
1152 __unused processor_set_t pset
,
1153 __unused
int policy
)
1155 return KERN_INVALID_ARGUMENT
;
1159 * processor_set_policy_disable:
1161 * Forbid indicated policy on processor set. Time sharing cannot
1165 processor_set_policy_disable(
1166 __unused processor_set_t pset
,
1167 __unused
int policy
,
1168 __unused boolean_t change_threads
)
1170 return KERN_INVALID_ARGUMENT
;
1174 * processor_set_things:
1176 * Common internals for processor_set_{threads,tasks}
1178 static kern_return_t
1179 processor_set_things(
1180 processor_set_t pset
,
1182 mach_msg_type_number_t
*count
,
1190 unsigned int actual_tasks
;
1191 vm_size_t task_size
, task_size_needed
;
1193 thread_t
*thread_list
;
1194 unsigned int actual_threads
;
1195 vm_size_t thread_size
, thread_size_needed
;
1197 void *addr
, *newaddr
;
1198 vm_size_t size
, size_needed
;
1200 if (pset
== PROCESSOR_SET_NULL
|| pset
!= &pset0
) {
1201 return KERN_INVALID_ARGUMENT
;
1205 task_size_needed
= 0;
1210 thread_size_needed
= 0;
1215 lck_mtx_lock(&tasks_threads_lock
);
1217 /* do we have the memory we need? */
1218 if (type
== PSET_THING_THREAD
) {
1219 thread_size_needed
= threads_count
* sizeof(void *);
1224 task_size_needed
= tasks_count
* sizeof(void *);
1226 if (task_size_needed
<= task_size
&&
1227 thread_size_needed
<= thread_size
) {
1231 /* unlock and allocate more memory */
1232 lck_mtx_unlock(&tasks_threads_lock
);
1234 /* grow task array */
1235 if (task_size_needed
> task_size
) {
1236 if (task_size
!= 0) {
1237 kfree(task_list
, task_size
);
1240 assert(task_size_needed
> 0);
1241 task_size
= task_size_needed
;
1243 task_list
= (task_t
*)kalloc(task_size
);
1244 if (task_list
== NULL
) {
1245 if (thread_size
!= 0) {
1246 kfree(thread_list
, thread_size
);
1248 return KERN_RESOURCE_SHORTAGE
;
1252 /* grow thread array */
1253 if (thread_size_needed
> thread_size
) {
1254 if (thread_size
!= 0) {
1255 kfree(thread_list
, thread_size
);
1258 assert(thread_size_needed
> 0);
1259 thread_size
= thread_size_needed
;
1261 thread_list
= (thread_t
*)kalloc(thread_size
);
1262 if (thread_list
== 0) {
1263 if (task_size
!= 0) {
1264 kfree(task_list
, task_size
);
1266 return KERN_RESOURCE_SHORTAGE
;
1271 /* OK, have memory and the list locked */
1273 /* If we need it, get the thread list */
1274 if (type
== PSET_THING_THREAD
) {
1275 for (thread
= (thread_t
)queue_first(&threads
);
1276 !queue_end(&threads
, (queue_entry_t
)thread
);
1277 thread
= (thread_t
)queue_next(&thread
->threads
)) {
1278 #if defined(SECURE_KERNEL)
1279 if (thread
->task
!= kernel_task
) {
1281 thread_reference_internal(thread
);
1282 thread_list
[actual_threads
++] = thread
;
1283 #if defined(SECURE_KERNEL)
1291 /* get a list of the tasks */
1292 for (task
= (task_t
)queue_first(&tasks
);
1293 !queue_end(&tasks
, (queue_entry_t
)task
);
1294 task
= (task_t
)queue_next(&task
->tasks
)) {
1295 #if defined(SECURE_KERNEL)
1296 if (task
!= kernel_task
) {
1298 task_reference_internal(task
);
1299 task_list
[actual_tasks
++] = task
;
1300 #if defined(SECURE_KERNEL)
1308 lck_mtx_unlock(&tasks_threads_lock
);
1311 unsigned int j
, used
;
1313 /* for each task, make sure we are allowed to examine it */
1314 for (i
= used
= 0; i
< actual_tasks
; i
++) {
1315 if (mac_task_check_expose_task(task_list
[i
])) {
1316 task_deallocate(task_list
[i
]);
1319 task_list
[used
++] = task_list
[i
];
1321 actual_tasks
= used
;
1322 task_size_needed
= actual_tasks
* sizeof(void *);
1324 if (type
== PSET_THING_THREAD
) {
1325 /* for each thread (if any), make sure it's task is in the allowed list */
1326 for (i
= used
= 0; i
< actual_threads
; i
++) {
1327 boolean_t found_task
= FALSE
;
1329 task
= thread_list
[i
]->task
;
1330 for (j
= 0; j
< actual_tasks
; j
++) {
1331 if (task_list
[j
] == task
) {
1337 thread_list
[used
++] = thread_list
[i
];
1339 thread_deallocate(thread_list
[i
]);
1342 actual_threads
= used
;
1343 thread_size_needed
= actual_threads
* sizeof(void *);
1345 /* done with the task list */
1346 for (i
= 0; i
< actual_tasks
; i
++) {
1347 task_deallocate(task_list
[i
]);
1349 kfree(task_list
, task_size
);
1356 if (type
== PSET_THING_THREAD
) {
1357 if (actual_threads
== 0) {
1358 /* no threads available to return */
1359 assert(task_size
== 0);
1360 if (thread_size
!= 0) {
1361 kfree(thread_list
, thread_size
);
1365 return KERN_SUCCESS
;
1367 size_needed
= actual_threads
* sizeof(void *);
1371 if (actual_tasks
== 0) {
1372 /* no tasks available to return */
1373 assert(thread_size
== 0);
1374 if (task_size
!= 0) {
1375 kfree(task_list
, task_size
);
1379 return KERN_SUCCESS
;
1381 size_needed
= actual_tasks
* sizeof(void *);
1386 /* if we allocated too much, must copy */
1387 if (size_needed
< size
) {
1388 newaddr
= kalloc(size_needed
);
1390 for (i
= 0; i
< actual_tasks
; i
++) {
1391 if (type
== PSET_THING_THREAD
) {
1392 thread_deallocate(thread_list
[i
]);
1394 task_deallocate(task_list
[i
]);
1400 return KERN_RESOURCE_SHORTAGE
;
1403 bcopy((void *) addr
, (void *) newaddr
, size_needed
);
1410 *thing_list
= (void **)addr
;
1411 *count
= (unsigned int)size
/ sizeof(void *);
1413 return KERN_SUCCESS
;
1417 * processor_set_tasks:
1419 * List all tasks in the processor set.
1421 static kern_return_t
1422 processor_set_tasks_internal(
1423 processor_set_t pset
,
1424 task_array_t
*task_list
,
1425 mach_msg_type_number_t
*count
,
1429 mach_msg_type_number_t i
;
1431 ret
= processor_set_things(pset
, (void **)task_list
, count
, PSET_THING_TASK
);
1432 if (ret
!= KERN_SUCCESS
) {
1436 /* do the conversion that Mig should handle */
1438 case TASK_FLAVOR_CONTROL
:
1439 for (i
= 0; i
< *count
; i
++) {
1440 (*task_list
)[i
] = (task_t
)convert_task_to_port((*task_list
)[i
]);
1443 case TASK_FLAVOR_READ
:
1444 for (i
= 0; i
< *count
; i
++) {
1445 (*task_list
)[i
] = (task_t
)convert_task_read_to_port((*task_list
)[i
]);
1448 case TASK_FLAVOR_INSPECT
:
1449 for (i
= 0; i
< *count
; i
++) {
1450 (*task_list
)[i
] = (task_t
)convert_task_inspect_to_port((*task_list
)[i
]);
1453 case TASK_FLAVOR_NAME
:
1454 for (i
= 0; i
< *count
; i
++) {
1455 (*task_list
)[i
] = (task_t
)convert_task_name_to_port((*task_list
)[i
]);
1459 return KERN_INVALID_ARGUMENT
;
1462 return KERN_SUCCESS
;
1466 processor_set_tasks(
1467 processor_set_t pset
,
1468 task_array_t
*task_list
,
1469 mach_msg_type_number_t
*count
)
1471 return processor_set_tasks_internal(pset
, task_list
, count
, TASK_FLAVOR_CONTROL
);
1475 * processor_set_tasks_with_flavor:
1477 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1480 processor_set_tasks_with_flavor(
1481 processor_set_t pset
,
1482 mach_task_flavor_t flavor
,
1483 task_array_t
*task_list
,
1484 mach_msg_type_number_t
*count
)
1487 case TASK_FLAVOR_CONTROL
:
1488 case TASK_FLAVOR_READ
:
1489 case TASK_FLAVOR_INSPECT
:
1490 case TASK_FLAVOR_NAME
:
1491 return processor_set_tasks_internal(pset
, task_list
, count
, flavor
);
1493 return KERN_INVALID_ARGUMENT
;
1498 * processor_set_threads:
1500 * List all threads in the processor set.
1502 #if defined(SECURE_KERNEL)
1504 processor_set_threads(
1505 __unused processor_set_t pset
,
1506 __unused thread_array_t
*thread_list
,
1507 __unused mach_msg_type_number_t
*count
)
1509 return KERN_FAILURE
;
1511 #elif !defined(XNU_TARGET_OS_OSX)
1513 processor_set_threads(
1514 __unused processor_set_t pset
,
1515 __unused thread_array_t
*thread_list
,
1516 __unused mach_msg_type_number_t
*count
)
1518 return KERN_NOT_SUPPORTED
;
1522 processor_set_threads(
1523 processor_set_t pset
,
1524 thread_array_t
*thread_list
,
1525 mach_msg_type_number_t
*count
)
1528 mach_msg_type_number_t i
;
1530 ret
= processor_set_things(pset
, (void **)thread_list
, count
, PSET_THING_THREAD
);
1531 if (ret
!= KERN_SUCCESS
) {
1535 /* do the conversion that Mig should handle */
1536 for (i
= 0; i
< *count
; i
++) {
1537 (*thread_list
)[i
] = (thread_t
)convert_thread_to_port((*thread_list
)[i
]);
1539 return KERN_SUCCESS
;
1544 * processor_set_policy_control
1546 * Controls the scheduling attributes governing the processor set.
1547 * Allows control of enabled policies, and per-policy base and limit
1551 processor_set_policy_control(
1552 __unused processor_set_t pset
,
1553 __unused
int flavor
,
1554 __unused processor_set_info_t policy_info
,
1555 __unused mach_msg_type_number_t count
,
1556 __unused boolean_t change
)
1558 return KERN_INVALID_ARGUMENT
;
1561 #undef pset_deallocate
1562 void pset_deallocate(processor_set_t pset
);
1565 __unused processor_set_t pset
)
1570 #undef pset_reference
1571 void pset_reference(processor_set_t pset
);
1574 __unused processor_set_t pset
)
1579 #if CONFIG_THREAD_GROUPS
1582 thread_group_pset_recommendation(__unused
struct thread_group
*tg
, __unused cluster_type_t recommendation
)
1585 switch (recommendation
) {
1586 case CLUSTER_TYPE_SMP
:
1589 * In case of SMP recommendations, check if the thread
1590 * group has special flags which restrict it to the E
1593 if (thread_group_smp_restricted(tg
)) {
1597 case CLUSTER_TYPE_E
:
1599 case CLUSTER_TYPE_P
:
1604 #endif /* __AMP__ */
1610 recommended_pset_type(thread_t thread
)
1612 #if CONFIG_THREAD_GROUPS && __AMP__
1613 if (thread
== THREAD_NULL
) {
1617 if (thread
->sched_flags
& TH_SFLAG_ECORE_ONLY
) {
1619 } else if (thread
->sched_flags
& TH_SFLAG_PCORE_ONLY
) {
1623 if (thread
->base_pri
<= MAXPRI_THROTTLE
) {
1624 if (os_atomic_load(&sched_perfctl_policy_bg
, relaxed
) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP
) {
1627 } else if (thread
->base_pri
<= BASEPRI_UTILITY
) {
1628 if (os_atomic_load(&sched_perfctl_policy_util
, relaxed
) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP
) {
1633 #if DEVELOPMENT || DEBUG
1634 extern bool system_ecore_only
;
1635 extern processor_set_t pcore_set
;
1636 if (system_ecore_only
) {
1637 if (thread
->task
->pset_hint
== pcore_set
) {
1644 struct thread_group
*tg
= thread_group_get(thread
);
1645 cluster_type_t recommendation
= thread_group_recommendation(tg
);
1646 switch (recommendation
) {
1647 case CLUSTER_TYPE_SMP
:
1649 if (thread
->task
== kernel_task
) {
1653 case CLUSTER_TYPE_E
:
1655 case CLUSTER_TYPE_P
:
1664 #if CONFIG_THREAD_GROUPS && __AMP__
1667 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class
, boolean_t inherit
)
1669 sched_perfctl_class_policy_t sched_policy
= inherit
? SCHED_PERFCTL_POLICY_FOLLOW_GROUP
: SCHED_PERFCTL_POLICY_RESTRICT_E
;
1671 KDBG(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_AMP_PERFCTL_POLICY_CHANGE
) | DBG_FUNC_NONE
, perfctl_class
, sched_policy
, 0, 0);
1673 switch (perfctl_class
) {
1674 case PERFCONTROL_CLASS_UTILITY
:
1675 os_atomic_store(&sched_perfctl_policy_util
, sched_policy
, relaxed
);
1677 case PERFCONTROL_CLASS_BACKGROUND
:
1678 os_atomic_store(&sched_perfctl_policy_bg
, sched_policy
, relaxed
);
1681 panic("perfctl_class invalid");
1686 #elif defined(__arm64__)
1688 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1690 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class
, __unused boolean_t inherit
)
1694 #endif /* defined(__arm64__) */