2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <arm/machine_cpu.h>
30 #include <arm/cpu_internal.h>
31 #include <arm/cpuid.h>
32 #include <arm/cpu_data.h>
33 #include <arm/cpu_data_internal.h>
34 #include <arm/misc_protos.h>
35 #include <arm/machdep_call.h>
36 #include <arm/machine_routines.h>
37 #include <arm/rtclock.h>
38 #include <kern/machine.h>
39 #include <kern/thread.h>
40 #include <kern/thread_group.h>
41 #include <kern/policy_internal.h>
42 #include <kern/startup.h>
43 #include <machine/config.h>
44 #include <machine/atomic.h>
45 #include <pexpert/pexpert.h>
48 #include <kern/monotonic.h>
49 #include <machine/monotonic.h>
50 #endif /* MONOTONIC */
52 #include <mach/machine.h>
54 #if !HAS_CONTINUOUS_HWCLOCK
55 extern uint64_t mach_absolutetime_asleep
;
57 extern uint64_t wake_abstime
;
58 static uint64_t wake_conttime
= UINT64_MAX
;
61 extern volatile uint32_t debug_enabled
;
63 static int max_cpus_initialized
= 0;
64 #define MAX_CPUS_SET 0x1
65 #define MAX_CPUS_WAIT 0x2
67 LCK_GRP_DECLARE(max_cpus_grp
, "max_cpus");
68 LCK_MTX_DECLARE(max_cpus_lock
, &max_cpus_grp
);
69 uint32_t lockdown_done
= 0;
70 boolean_t is_clock_configured
= FALSE
;
74 sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused
, going_on_core_t on __unused
)
79 sched_perfcontrol_switch_default(perfcontrol_state_t old_thread_state __unused
, perfcontrol_state_t new_thread_state __unused
)
84 sched_perfcontrol_offcore_default(perfcontrol_state_t old_thread_state __unused
, going_off_core_t off __unused
, boolean_t thread_terminating __unused
)
89 sched_perfcontrol_thread_group_default(thread_group_data_t data __unused
)
94 sched_perfcontrol_max_runnable_latency_default(perfcontrol_max_runnable_latency_t latencies __unused
)
99 sched_perfcontrol_work_interval_notify_default(perfcontrol_state_t thread_state __unused
,
100 perfcontrol_work_interval_t work_interval __unused
)
105 sched_perfcontrol_work_interval_ctl_default(perfcontrol_state_t thread_state __unused
,
106 perfcontrol_work_interval_instance_t instance __unused
)
111 sched_perfcontrol_deadline_passed_default(__unused
uint64_t deadline
)
116 sched_perfcontrol_csw_default(
117 __unused perfcontrol_event event
, __unused
uint32_t cpu_id
, __unused
uint64_t timestamp
,
118 __unused
uint32_t flags
, __unused
struct perfcontrol_thread_data
*offcore
,
119 __unused
struct perfcontrol_thread_data
*oncore
,
120 __unused
struct perfcontrol_cpu_counters
*cpu_counters
, __unused
void *unused
)
125 sched_perfcontrol_state_update_default(
126 __unused perfcontrol_event event
, __unused
uint32_t cpu_id
, __unused
uint64_t timestamp
,
127 __unused
uint32_t flags
, __unused
struct perfcontrol_thread_data
*thr_data
,
128 __unused
void *unused
)
133 sched_perfcontrol_thread_group_blocked_default(
134 __unused thread_group_data_t blocked_tg
, __unused thread_group_data_t blocking_tg
,
135 __unused
uint32_t flags
, __unused perfcontrol_state_t blocked_thr_state
)
140 sched_perfcontrol_thread_group_unblocked_default(
141 __unused thread_group_data_t unblocked_tg
, __unused thread_group_data_t unblocking_tg
,
142 __unused
uint32_t flags
, __unused perfcontrol_state_t unblocked_thr_state
)
146 sched_perfcontrol_offcore_t sched_perfcontrol_offcore
= sched_perfcontrol_offcore_default
;
147 sched_perfcontrol_context_switch_t sched_perfcontrol_switch
= sched_perfcontrol_switch_default
;
148 sched_perfcontrol_oncore_t sched_perfcontrol_oncore
= sched_perfcontrol_oncore_default
;
149 sched_perfcontrol_thread_group_init_t sched_perfcontrol_thread_group_init
= sched_perfcontrol_thread_group_default
;
150 sched_perfcontrol_thread_group_deinit_t sched_perfcontrol_thread_group_deinit
= sched_perfcontrol_thread_group_default
;
151 sched_perfcontrol_thread_group_flags_update_t sched_perfcontrol_thread_group_flags_update
= sched_perfcontrol_thread_group_default
;
152 sched_perfcontrol_max_runnable_latency_t sched_perfcontrol_max_runnable_latency
= sched_perfcontrol_max_runnable_latency_default
;
153 sched_perfcontrol_work_interval_notify_t sched_perfcontrol_work_interval_notify
= sched_perfcontrol_work_interval_notify_default
;
154 sched_perfcontrol_work_interval_ctl_t sched_perfcontrol_work_interval_ctl
= sched_perfcontrol_work_interval_ctl_default
;
155 sched_perfcontrol_deadline_passed_t sched_perfcontrol_deadline_passed
= sched_perfcontrol_deadline_passed_default
;
156 sched_perfcontrol_csw_t sched_perfcontrol_csw
= sched_perfcontrol_csw_default
;
157 sched_perfcontrol_state_update_t sched_perfcontrol_state_update
= sched_perfcontrol_state_update_default
;
158 sched_perfcontrol_thread_group_blocked_t sched_perfcontrol_thread_group_blocked
= sched_perfcontrol_thread_group_blocked_default
;
159 sched_perfcontrol_thread_group_unblocked_t sched_perfcontrol_thread_group_unblocked
= sched_perfcontrol_thread_group_unblocked_default
;
162 sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks
, unsigned long size_of_state
)
164 assert(callbacks
== NULL
|| callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_2
);
166 if (size_of_state
> sizeof(struct perfcontrol_state
)) {
167 panic("%s: Invalid required state size %lu", __FUNCTION__
, size_of_state
);
171 #if CONFIG_THREAD_GROUPS
172 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_3
) {
173 if (callbacks
->thread_group_init
!= NULL
) {
174 sched_perfcontrol_thread_group_init
= callbacks
->thread_group_init
;
176 sched_perfcontrol_thread_group_init
= sched_perfcontrol_thread_group_default
;
178 if (callbacks
->thread_group_deinit
!= NULL
) {
179 sched_perfcontrol_thread_group_deinit
= callbacks
->thread_group_deinit
;
181 sched_perfcontrol_thread_group_deinit
= sched_perfcontrol_thread_group_default
;
183 // tell CLPC about existing thread groups
184 thread_group_resync(TRUE
);
187 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_6
) {
188 if (callbacks
->thread_group_flags_update
!= NULL
) {
189 sched_perfcontrol_thread_group_flags_update
= callbacks
->thread_group_flags_update
;
191 sched_perfcontrol_thread_group_flags_update
= sched_perfcontrol_thread_group_default
;
195 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_8
) {
196 if (callbacks
->thread_group_blocked
!= NULL
) {
197 sched_perfcontrol_thread_group_blocked
= callbacks
->thread_group_blocked
;
199 sched_perfcontrol_thread_group_blocked
= sched_perfcontrol_thread_group_blocked_default
;
202 if (callbacks
->thread_group_unblocked
!= NULL
) {
203 sched_perfcontrol_thread_group_unblocked
= callbacks
->thread_group_unblocked
;
205 sched_perfcontrol_thread_group_unblocked
= sched_perfcontrol_thread_group_unblocked_default
;
210 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_7
) {
211 if (callbacks
->work_interval_ctl
!= NULL
) {
212 sched_perfcontrol_work_interval_ctl
= callbacks
->work_interval_ctl
;
214 sched_perfcontrol_work_interval_ctl
= sched_perfcontrol_work_interval_ctl_default
;
218 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_5
) {
219 if (callbacks
->csw
!= NULL
) {
220 sched_perfcontrol_csw
= callbacks
->csw
;
222 sched_perfcontrol_csw
= sched_perfcontrol_csw_default
;
225 if (callbacks
->state_update
!= NULL
) {
226 sched_perfcontrol_state_update
= callbacks
->state_update
;
228 sched_perfcontrol_state_update
= sched_perfcontrol_state_update_default
;
232 if (callbacks
->version
>= SCHED_PERFCONTROL_CALLBACKS_VERSION_4
) {
233 if (callbacks
->deadline_passed
!= NULL
) {
234 sched_perfcontrol_deadline_passed
= callbacks
->deadline_passed
;
236 sched_perfcontrol_deadline_passed
= sched_perfcontrol_deadline_passed_default
;
240 if (callbacks
->offcore
!= NULL
) {
241 sched_perfcontrol_offcore
= callbacks
->offcore
;
243 sched_perfcontrol_offcore
= sched_perfcontrol_offcore_default
;
246 if (callbacks
->context_switch
!= NULL
) {
247 sched_perfcontrol_switch
= callbacks
->context_switch
;
249 sched_perfcontrol_switch
= sched_perfcontrol_switch_default
;
252 if (callbacks
->oncore
!= NULL
) {
253 sched_perfcontrol_oncore
= callbacks
->oncore
;
255 sched_perfcontrol_oncore
= sched_perfcontrol_oncore_default
;
258 if (callbacks
->max_runnable_latency
!= NULL
) {
259 sched_perfcontrol_max_runnable_latency
= callbacks
->max_runnable_latency
;
261 sched_perfcontrol_max_runnable_latency
= sched_perfcontrol_max_runnable_latency_default
;
264 if (callbacks
->work_interval_notify
!= NULL
) {
265 sched_perfcontrol_work_interval_notify
= callbacks
->work_interval_notify
;
267 sched_perfcontrol_work_interval_notify
= sched_perfcontrol_work_interval_notify_default
;
270 /* reset to defaults */
271 #if CONFIG_THREAD_GROUPS
272 thread_group_resync(FALSE
);
274 sched_perfcontrol_offcore
= sched_perfcontrol_offcore_default
;
275 sched_perfcontrol_switch
= sched_perfcontrol_switch_default
;
276 sched_perfcontrol_oncore
= sched_perfcontrol_oncore_default
;
277 sched_perfcontrol_thread_group_init
= sched_perfcontrol_thread_group_default
;
278 sched_perfcontrol_thread_group_deinit
= sched_perfcontrol_thread_group_default
;
279 sched_perfcontrol_thread_group_flags_update
= sched_perfcontrol_thread_group_default
;
280 sched_perfcontrol_max_runnable_latency
= sched_perfcontrol_max_runnable_latency_default
;
281 sched_perfcontrol_work_interval_notify
= sched_perfcontrol_work_interval_notify_default
;
282 sched_perfcontrol_work_interval_ctl
= sched_perfcontrol_work_interval_ctl_default
;
283 sched_perfcontrol_csw
= sched_perfcontrol_csw_default
;
284 sched_perfcontrol_state_update
= sched_perfcontrol_state_update_default
;
285 sched_perfcontrol_thread_group_blocked
= sched_perfcontrol_thread_group_blocked_default
;
286 sched_perfcontrol_thread_group_unblocked
= sched_perfcontrol_thread_group_unblocked_default
;
292 machine_switch_populate_perfcontrol_thread_data(struct perfcontrol_thread_data
*data
,
294 uint64_t same_pri_latency
)
296 bzero(data
, sizeof(struct perfcontrol_thread_data
));
297 data
->perfctl_class
= thread_get_perfcontrol_class(thread
);
298 data
->energy_estimate_nj
= 0;
299 data
->thread_id
= thread
->thread_id
;
300 #if CONFIG_THREAD_GROUPS
301 struct thread_group
*tg
= thread_group_get(thread
);
302 data
->thread_group_id
= thread_group_get_id(tg
);
303 data
->thread_group_data
= thread_group_get_machine_data(tg
);
305 data
->scheduling_latency_at_same_basepri
= same_pri_latency
;
306 data
->perfctl_state
= FIND_PERFCONTROL_STATE(thread
);
310 machine_switch_populate_perfcontrol_cpu_counters(struct perfcontrol_cpu_counters
*cpu_counters
)
313 mt_perfcontrol(&cpu_counters
->instructions
, &cpu_counters
->cycles
);
314 #else /* MONOTONIC */
315 cpu_counters
->instructions
= 0;
316 cpu_counters
->cycles
= 0;
317 #endif /* !MONOTONIC */
320 int perfcontrol_callout_stats_enabled
= 0;
321 static _Atomic
uint64_t perfcontrol_callout_stats
[PERFCONTROL_CALLOUT_MAX
][PERFCONTROL_STAT_MAX
];
322 static _Atomic
uint64_t perfcontrol_callout_count
[PERFCONTROL_CALLOUT_MAX
];
327 perfcontrol_callout_counters_begin(uint64_t *counters
)
329 if (!perfcontrol_callout_stats_enabled
) {
332 mt_fixed_counts(counters
);
338 perfcontrol_callout_counters_end(uint64_t *start_counters
,
339 perfcontrol_callout_type_t type
)
341 uint64_t end_counters
[MT_CORE_NFIXED
];
342 mt_fixed_counts(end_counters
);
343 os_atomic_add(&perfcontrol_callout_stats
[type
][PERFCONTROL_STAT_CYCLES
],
344 end_counters
[MT_CORE_CYCLES
] - start_counters
[MT_CORE_CYCLES
], relaxed
);
345 #ifdef MT_CORE_INSTRS
346 os_atomic_add(&perfcontrol_callout_stats
[type
][PERFCONTROL_STAT_INSTRS
],
347 end_counters
[MT_CORE_INSTRS
] - start_counters
[MT_CORE_INSTRS
], relaxed
);
348 #endif /* defined(MT_CORE_INSTRS) */
349 os_atomic_inc(&perfcontrol_callout_count
[type
], relaxed
);
351 #endif /* MONOTONIC */
354 perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type
,
355 perfcontrol_callout_stat_t stat
)
357 if (!perfcontrol_callout_stats_enabled
) {
360 return os_atomic_load_wide(&perfcontrol_callout_stats
[type
][stat
], relaxed
) /
361 os_atomic_load_wide(&perfcontrol_callout_count
[type
], relaxed
);
366 machine_switch_perfcontrol_context(perfcontrol_event event
,
369 uint64_t new_thread_same_pri_latency
,
374 if (sched_perfcontrol_switch
!= sched_perfcontrol_switch_default
) {
375 perfcontrol_state_t old_perfcontrol_state
= FIND_PERFCONTROL_STATE(old
);
376 perfcontrol_state_t new_perfcontrol_state
= FIND_PERFCONTROL_STATE(new);
377 sched_perfcontrol_switch(old_perfcontrol_state
, new_perfcontrol_state
);
380 if (sched_perfcontrol_csw
!= sched_perfcontrol_csw_default
) {
381 uint32_t cpu_id
= (uint32_t)cpu_number();
382 struct perfcontrol_cpu_counters cpu_counters
;
383 struct perfcontrol_thread_data offcore
, oncore
;
384 machine_switch_populate_perfcontrol_thread_data(&offcore
, old
, 0);
385 machine_switch_populate_perfcontrol_thread_data(&oncore
, new,
386 new_thread_same_pri_latency
);
387 machine_switch_populate_perfcontrol_cpu_counters(&cpu_counters
);
390 uint64_t counters
[MT_CORE_NFIXED
];
391 bool ctrs_enabled
= perfcontrol_callout_counters_begin(counters
);
392 #endif /* MONOTONIC */
393 sched_perfcontrol_csw(event
, cpu_id
, timestamp
, flags
,
394 &offcore
, &oncore
, &cpu_counters
, NULL
);
397 perfcontrol_callout_counters_end(counters
, PERFCONTROL_CALLOUT_CONTEXT
);
399 #endif /* MONOTONIC */
402 old
->machine
.energy_estimate_nj
+= offcore
.energy_estimate_nj
;
403 new->machine
.energy_estimate_nj
+= oncore
.energy_estimate_nj
;
409 machine_switch_perfcontrol_state_update(perfcontrol_event event
,
415 if (sched_perfcontrol_state_update
== sched_perfcontrol_state_update_default
) {
418 uint32_t cpu_id
= (uint32_t)cpu_number();
419 struct perfcontrol_thread_data data
;
420 machine_switch_populate_perfcontrol_thread_data(&data
, thread
, 0);
423 uint64_t counters
[MT_CORE_NFIXED
];
424 bool ctrs_enabled
= perfcontrol_callout_counters_begin(counters
);
425 #endif /* MONOTONIC */
426 sched_perfcontrol_state_update(event
, cpu_id
, timestamp
, flags
,
430 perfcontrol_callout_counters_end(counters
, PERFCONTROL_CALLOUT_STATE_UPDATE
);
432 #endif /* MONOTONIC */
435 thread
->machine
.energy_estimate_nj
+= data
.energy_estimate_nj
;
440 machine_thread_going_on_core(thread_t new_thread
,
441 thread_urgency_t urgency
,
442 uint64_t sched_latency
,
443 uint64_t same_pri_latency
,
446 if (sched_perfcontrol_oncore
== sched_perfcontrol_oncore_default
) {
449 struct going_on_core on_core
;
450 perfcontrol_state_t state
= FIND_PERFCONTROL_STATE(new_thread
);
452 on_core
.thread_id
= new_thread
->thread_id
;
453 on_core
.energy_estimate_nj
= 0;
454 on_core
.qos_class
= (uint16_t)proc_get_effective_thread_policy(new_thread
, TASK_POLICY_QOS
);
455 on_core
.urgency
= (uint16_t)urgency
;
456 on_core
.is_32_bit
= thread_is_64bit_data(new_thread
) ? FALSE
: TRUE
;
457 on_core
.is_kernel_thread
= new_thread
->task
== kernel_task
;
458 #if CONFIG_THREAD_GROUPS
459 struct thread_group
*tg
= thread_group_get(new_thread
);
460 on_core
.thread_group_id
= thread_group_get_id(tg
);
461 on_core
.thread_group_data
= thread_group_get_machine_data(tg
);
463 on_core
.scheduling_latency
= sched_latency
;
464 on_core
.start_time
= timestamp
;
465 on_core
.scheduling_latency_at_same_basepri
= same_pri_latency
;
468 uint64_t counters
[MT_CORE_NFIXED
];
469 bool ctrs_enabled
= perfcontrol_callout_counters_begin(counters
);
470 #endif /* MONOTONIC */
471 sched_perfcontrol_oncore(state
, &on_core
);
474 perfcontrol_callout_counters_end(counters
, PERFCONTROL_CALLOUT_ON_CORE
);
476 #endif /* MONOTONIC */
479 new_thread
->machine
.energy_estimate_nj
+= on_core
.energy_estimate_nj
;
484 machine_thread_going_off_core(thread_t old_thread
, boolean_t thread_terminating
,
485 uint64_t last_dispatch
, __unused boolean_t thread_runnable
)
487 if (sched_perfcontrol_offcore
== sched_perfcontrol_offcore_default
) {
490 struct going_off_core off_core
;
491 perfcontrol_state_t state
= FIND_PERFCONTROL_STATE(old_thread
);
493 off_core
.thread_id
= old_thread
->thread_id
;
494 off_core
.energy_estimate_nj
= 0;
495 off_core
.end_time
= last_dispatch
;
496 #if CONFIG_THREAD_GROUPS
497 struct thread_group
*tg
= thread_group_get(old_thread
);
498 off_core
.thread_group_id
= thread_group_get_id(tg
);
499 off_core
.thread_group_data
= thread_group_get_machine_data(tg
);
503 uint64_t counters
[MT_CORE_NFIXED
];
504 bool ctrs_enabled
= perfcontrol_callout_counters_begin(counters
);
505 #endif /* MONOTONIC */
506 sched_perfcontrol_offcore(state
, &off_core
, thread_terminating
);
509 perfcontrol_callout_counters_end(counters
, PERFCONTROL_CALLOUT_OFF_CORE
);
511 #endif /* MONOTONIC */
514 old_thread
->machine
.energy_estimate_nj
+= off_core
.energy_estimate_nj
;
518 #if CONFIG_THREAD_GROUPS
520 machine_thread_group_init(struct thread_group
*tg
)
522 if (sched_perfcontrol_thread_group_init
== sched_perfcontrol_thread_group_default
) {
525 struct thread_group_data data
;
526 data
.thread_group_id
= thread_group_get_id(tg
);
527 data
.thread_group_data
= thread_group_get_machine_data(tg
);
528 data
.thread_group_size
= thread_group_machine_data_size();
529 data
.thread_group_flags
= thread_group_get_flags(tg
);
530 sched_perfcontrol_thread_group_init(&data
);
534 machine_thread_group_deinit(struct thread_group
*tg
)
536 if (sched_perfcontrol_thread_group_deinit
== sched_perfcontrol_thread_group_default
) {
539 struct thread_group_data data
;
540 data
.thread_group_id
= thread_group_get_id(tg
);
541 data
.thread_group_data
= thread_group_get_machine_data(tg
);
542 data
.thread_group_size
= thread_group_machine_data_size();
543 data
.thread_group_flags
= thread_group_get_flags(tg
);
544 sched_perfcontrol_thread_group_deinit(&data
);
548 machine_thread_group_flags_update(struct thread_group
*tg
, uint32_t flags
)
550 if (sched_perfcontrol_thread_group_flags_update
== sched_perfcontrol_thread_group_default
) {
553 struct thread_group_data data
;
554 data
.thread_group_id
= thread_group_get_id(tg
);
555 data
.thread_group_data
= thread_group_get_machine_data(tg
);
556 data
.thread_group_size
= thread_group_machine_data_size();
557 data
.thread_group_flags
= flags
;
558 sched_perfcontrol_thread_group_flags_update(&data
);
562 machine_thread_group_blocked(struct thread_group
*blocked_tg
,
563 struct thread_group
*blocking_tg
,
565 thread_t blocked_thread
)
567 if (sched_perfcontrol_thread_group_blocked
== sched_perfcontrol_thread_group_blocked_default
) {
571 spl_t s
= splsched();
573 perfcontrol_state_t state
= FIND_PERFCONTROL_STATE(blocked_thread
);
574 struct thread_group_data blocked_data
;
575 assert(blocked_tg
!= NULL
);
577 blocked_data
.thread_group_id
= thread_group_get_id(blocked_tg
);
578 blocked_data
.thread_group_data
= thread_group_get_machine_data(blocked_tg
);
579 blocked_data
.thread_group_size
= thread_group_machine_data_size();
581 if (blocking_tg
== NULL
) {
583 * For special cases such as the render server, the blocking TG is a
584 * well known TG. Only in that case, the blocking_tg should be NULL.
586 assert(flags
& PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER
);
587 sched_perfcontrol_thread_group_blocked(&blocked_data
, NULL
, flags
, state
);
589 struct thread_group_data blocking_data
;
590 blocking_data
.thread_group_id
= thread_group_get_id(blocking_tg
);
591 blocking_data
.thread_group_data
= thread_group_get_machine_data(blocking_tg
);
592 blocking_data
.thread_group_size
= thread_group_machine_data_size();
593 sched_perfcontrol_thread_group_blocked(&blocked_data
, &blocking_data
, flags
, state
);
595 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP
, MACH_THREAD_GROUP_BLOCK
) | DBG_FUNC_START
,
596 thread_tid(blocked_thread
), thread_group_get_id(blocked_tg
),
597 blocking_tg
? thread_group_get_id(blocking_tg
) : THREAD_GROUP_INVALID
,
604 machine_thread_group_unblocked(struct thread_group
*unblocked_tg
,
605 struct thread_group
*unblocking_tg
,
607 thread_t unblocked_thread
)
609 if (sched_perfcontrol_thread_group_unblocked
== sched_perfcontrol_thread_group_unblocked_default
) {
613 spl_t s
= splsched();
615 perfcontrol_state_t state
= FIND_PERFCONTROL_STATE(unblocked_thread
);
616 struct thread_group_data unblocked_data
;
617 assert(unblocked_tg
!= NULL
);
619 unblocked_data
.thread_group_id
= thread_group_get_id(unblocked_tg
);
620 unblocked_data
.thread_group_data
= thread_group_get_machine_data(unblocked_tg
);
621 unblocked_data
.thread_group_size
= thread_group_machine_data_size();
623 if (unblocking_tg
== NULL
) {
625 * For special cases such as the render server, the unblocking TG is a
626 * well known TG. Only in that case, the unblocking_tg should be NULL.
628 assert(flags
& PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER
);
629 sched_perfcontrol_thread_group_unblocked(&unblocked_data
, NULL
, flags
, state
);
631 struct thread_group_data unblocking_data
;
632 unblocking_data
.thread_group_id
= thread_group_get_id(unblocking_tg
);
633 unblocking_data
.thread_group_data
= thread_group_get_machine_data(unblocking_tg
);
634 unblocking_data
.thread_group_size
= thread_group_machine_data_size();
635 sched_perfcontrol_thread_group_unblocked(&unblocked_data
, &unblocking_data
, flags
, state
);
637 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP
, MACH_THREAD_GROUP_BLOCK
) | DBG_FUNC_END
,
638 thread_tid(unblocked_thread
), thread_group_get_id(unblocked_tg
),
639 unblocking_tg
? thread_group_get_id(unblocking_tg
) : THREAD_GROUP_INVALID
,
645 #endif /* CONFIG_THREAD_GROUPS */
648 machine_max_runnable_latency(uint64_t bg_max_latency
,
649 uint64_t default_max_latency
,
650 uint64_t realtime_max_latency
)
652 if (sched_perfcontrol_max_runnable_latency
== sched_perfcontrol_max_runnable_latency_default
) {
655 struct perfcontrol_max_runnable_latency latencies
= {
656 .max_scheduling_latencies
= {
657 [THREAD_URGENCY_NONE
] = 0,
658 [THREAD_URGENCY_BACKGROUND
] = bg_max_latency
,
659 [THREAD_URGENCY_NORMAL
] = default_max_latency
,
660 [THREAD_URGENCY_REAL_TIME
] = realtime_max_latency
664 sched_perfcontrol_max_runnable_latency(&latencies
);
668 machine_work_interval_notify(thread_t thread
,
669 struct kern_work_interval_args
* kwi_args
)
671 if (sched_perfcontrol_work_interval_notify
== sched_perfcontrol_work_interval_notify_default
) {
674 perfcontrol_state_t state
= FIND_PERFCONTROL_STATE(thread
);
675 struct perfcontrol_work_interval work_interval
= {
676 .thread_id
= thread
->thread_id
,
677 .qos_class
= (uint16_t)proc_get_effective_thread_policy(thread
, TASK_POLICY_QOS
),
678 .urgency
= kwi_args
->urgency
,
679 .flags
= kwi_args
->notify_flags
,
680 .work_interval_id
= kwi_args
->work_interval_id
,
681 .start
= kwi_args
->start
,
682 .finish
= kwi_args
->finish
,
683 .deadline
= kwi_args
->deadline
,
684 .next_start
= kwi_args
->next_start
,
685 .create_flags
= kwi_args
->create_flags
,
687 #if CONFIG_THREAD_GROUPS
688 struct thread_group
*tg
;
689 tg
= thread_group_get(thread
);
690 work_interval
.thread_group_id
= thread_group_get_id(tg
);
691 work_interval
.thread_group_data
= thread_group_get_machine_data(tg
);
693 sched_perfcontrol_work_interval_notify(state
, &work_interval
);
698 machine_perfcontrol_deadline_passed(uint64_t deadline
)
700 if (sched_perfcontrol_deadline_passed
!= sched_perfcontrol_deadline_passed_default
) {
701 sched_perfcontrol_deadline_passed(deadline
);
705 #if INTERRUPT_MASKED_DEBUG
707 * ml_spin_debug_reset()
708 * Reset the timestamp on a thread that has been unscheduled
709 * to avoid false alarms. Alarm will go off if interrupts are held
710 * disabled for too long, starting from now.
712 * Call ml_get_timebase() directly to prevent extra overhead on newer
713 * platforms that's enabled in DEVELOPMENT kernel configurations.
716 ml_spin_debug_reset(thread_t thread
)
718 if (thread
->machine
.intmask_timestamp
) {
719 thread
->machine
.intmask_timestamp
= ml_get_timebase();
724 * ml_spin_debug_clear()
725 * Clear the timestamp on a thread that has been unscheduled
726 * to avoid false alarms
729 ml_spin_debug_clear(thread_t thread
)
731 thread
->machine
.intmask_timestamp
= 0;
735 * ml_spin_debug_clear_self()
736 * Clear the timestamp on the current thread to prevent
740 ml_spin_debug_clear_self()
742 ml_spin_debug_clear(current_thread());
746 __ml_check_interrupts_disabled_duration(thread_t thread
, uint64_t timeout
, bool is_int_handler
)
751 start
= is_int_handler
? thread
->machine
.inthandler_timestamp
: thread
->machine
.intmask_timestamp
;
753 now
= ml_get_timebase();
755 if ((now
- start
) > timeout
* debug_cpu_performance_degradation_factor
) {
756 mach_timebase_info_data_t timebase
;
757 clock_timebase_info(&timebase
);
761 * Disable the actual panic for KASAN due to the overhead of KASAN itself, leave the rest of the
762 * mechanism enabled so that KASAN can catch any bugs in the mechanism itself.
764 if (is_int_handler
) {
765 panic("Processing of an interrupt (type = %u, handler address = %p, vector = %p) took %llu nanoseconds (timeout = %llu ns)",
766 thread
->machine
.int_type
, (void *)thread
->machine
.int_handler_addr
, (void *)thread
->machine
.int_vector
,
767 (((now
- start
) * timebase
.numer
) / timebase
.denom
),
768 ((timeout
* debug_cpu_performance_degradation_factor
) * timebase
.numer
) / timebase
.denom
);
770 panic("Interrupts held disabled for %llu nanoseconds (timeout = %llu ns)",
771 (((now
- start
) * timebase
.numer
) / timebase
.denom
),
772 ((timeout
* debug_cpu_performance_degradation_factor
) * timebase
.numer
) / timebase
.denom
);
782 ml_check_interrupts_disabled_duration(thread_t thread
)
784 __ml_check_interrupts_disabled_duration(thread
, interrupt_masked_timeout
, false);
788 ml_check_stackshot_interrupt_disabled_duration(thread_t thread
)
790 /* Use MAX() to let the user bump the timeout further if needed */
791 __ml_check_interrupts_disabled_duration(thread
, MAX(stackshot_interrupt_masked_timeout
, interrupt_masked_timeout
), false);
795 ml_check_interrupt_handler_duration(thread_t thread
)
797 __ml_check_interrupts_disabled_duration(thread
, interrupt_masked_timeout
, true);
801 ml_irq_debug_start(uintptr_t handler
, uintptr_t vector
)
803 INTERRUPT_MASKED_DEBUG_START(handler
, DBG_INTR_TYPE_OTHER
);
804 current_thread()->machine
.int_vector
= (uintptr_t)VM_KERNEL_STRIP_PTR(vector
);
810 INTERRUPT_MASKED_DEBUG_END();
812 #endif // INTERRUPT_MASKED_DEBUG
816 ml_set_interrupts_enabled(boolean_t enable
)
822 #define INTERRUPT_MASK PSR_IRQF
823 state
= __builtin_arm_rsr("cpsr");
825 #define INTERRUPT_MASK DAIF_IRQF
826 state
= __builtin_arm_rsr("DAIF");
828 if (enable
&& (state
& INTERRUPT_MASK
)) {
829 assert(getCpuDatap()->cpu_int_state
== NULL
); // Make sure we're not enabling interrupts from primary interrupt context
830 #if INTERRUPT_MASKED_DEBUG
831 if (interrupt_masked_debug
) {
832 // Interrupts are currently masked, we will enable them (after finishing this check)
833 thread
= current_thread();
834 if (stackshot_active()) {
835 ml_check_stackshot_interrupt_disabled_duration(thread
);
837 ml_check_interrupts_disabled_duration(thread
);
839 thread
->machine
.intmask_timestamp
= 0;
841 #endif // INTERRUPT_MASKED_DEBUG
842 if (get_preemption_level() == 0) {
843 thread
= current_thread();
844 while (thread
->machine
.CpuDatap
->cpu_pending_ast
& AST_URGENT
) {
845 #if __ARM_USER_PROTECT__
846 uintptr_t up
= arm_user_protect_begin(thread
);
849 #if __ARM_USER_PROTECT__
850 arm_user_protect_end(thread
, up
, FALSE
);
855 __asm__
volatile ("cpsie if" ::: "memory"); // Enable IRQ FIQ
857 __builtin_arm_wsr("DAIFClr", DAIFSC_STANDARD_DISABLE
);
859 } else if (!enable
&& ((state
& INTERRUPT_MASK
) == 0)) {
861 __asm__
volatile ("cpsid if" ::: "memory"); // Mask IRQ FIQ
863 __builtin_arm_wsr("DAIFSet", DAIFSC_STANDARD_DISABLE
);
865 #if INTERRUPT_MASKED_DEBUG
866 if (interrupt_masked_debug
) {
867 // Interrupts were enabled, we just masked them
868 current_thread()->machine
.intmask_timestamp
= ml_get_timebase();
872 return (state
& INTERRUPT_MASK
) == 0;
876 ml_early_set_interrupts_enabled(boolean_t enable
)
878 return ml_set_interrupts_enabled(enable
);
882 * Routine: ml_at_interrupt_context
883 * Function: Check if running at interrupt context
886 ml_at_interrupt_context(void)
888 /* Do not use a stack-based check here, as the top-level exception handler
889 * is free to use some other stack besides the per-CPU interrupt stack.
890 * Interrupts should always be disabled if we're at interrupt context.
891 * Check that first, as we may be in a preemptible non-interrupt context, in
892 * which case we could be migrated to a different CPU between obtaining
893 * the per-cpu data pointer and loading cpu_int_state. We then might end
894 * up checking the interrupt state of a different CPU, resulting in a false
895 * positive. But if interrupts are disabled, we also know we cannot be
897 return !ml_get_interrupts_enabled() && (getCpuDatap()->cpu_int_state
!= NULL
);
901 ml_stack_remaining(void)
903 uintptr_t local
= (uintptr_t) &local
;
904 vm_offset_t intstack_top_ptr
;
906 /* Since this is a stack-based check, we don't need to worry about
907 * preemption as we do in ml_at_interrupt_context(). If we are preemptible,
908 * then the sp should never be within any CPU's interrupt stack unless
909 * something has gone horribly wrong. */
910 intstack_top_ptr
= getCpuDatap()->intstack_top
;
911 if ((local
< intstack_top_ptr
) && (local
> intstack_top_ptr
- INTSTACK_SIZE
)) {
912 return local
- (getCpuDatap()->intstack_top
- INTSTACK_SIZE
);
914 return local
- current_thread()->kernel_stack
;
918 static boolean_t ml_quiescing
= FALSE
;
921 ml_set_is_quiescing(boolean_t quiescing
)
923 ml_quiescing
= quiescing
;
924 os_atomic_thread_fence(release
);
928 ml_is_quiescing(void)
930 os_atomic_thread_fence(acquire
);
935 ml_get_booter_memory_size(void)
938 uint64_t roundsize
= 512 * 1024 * 1024ULL;
939 size
= BootArgs
->memSizeActual
;
941 size
= BootArgs
->memSize
;
942 if (size
< (2 * roundsize
)) {
945 size
= (size
+ roundsize
- 1) & ~(roundsize
- 1);
948 size
-= BootArgs
->memSize
;
954 ml_get_abstime_offset(void)
956 return rtclock_base_abstime
;
960 ml_get_conttime_offset(void)
962 #if HIBERNATION && HAS_CONTINUOUS_HWCLOCK
963 return hwclock_conttime_offset
;
964 #elif HAS_CONTINUOUS_HWCLOCK
967 return rtclock_base_abstime
+ mach_absolutetime_asleep
;
972 ml_get_time_since_reset(void)
974 #if HAS_CONTINUOUS_HWCLOCK
975 if (wake_conttime
== UINT64_MAX
) {
978 return mach_continuous_time() - wake_conttime
;
981 /* The timebase resets across S2R, so just return the raw value. */
982 return ml_get_hwclock();
987 ml_set_reset_time(__unused
uint64_t wake_time
)
989 #if HAS_CONTINUOUS_HWCLOCK
990 wake_conttime
= wake_time
;
995 ml_get_conttime_wake_time(void)
997 #if HAS_CONTINUOUS_HWCLOCK
999 * For now, we will reconstitute the timebase value from
1000 * cpu_timebase_init and use it as the wake time.
1002 return wake_abstime
- ml_get_abstime_offset();
1003 #else /* HAS_CONTINOUS_HWCLOCK */
1004 /* The wake time is simply our continuous time offset. */
1005 return ml_get_conttime_offset();
1006 #endif /* HAS_CONTINOUS_HWCLOCK */
1010 * ml_snoop_thread_is_on_core(thread_t thread)
1011 * Check if the given thread is currently on core. This function does not take
1012 * locks, disable preemption, or otherwise guarantee synchronization. The
1013 * result should be considered advisory.
1016 ml_snoop_thread_is_on_core(thread_t thread
)
1018 unsigned int cur_cpu_num
= 0;
1019 const unsigned int max_cpu_id
= ml_get_max_cpu_number();
1021 for (cur_cpu_num
= 0; cur_cpu_num
<= max_cpu_id
; cur_cpu_num
++) {
1022 if (CpuDataEntries
[cur_cpu_num
].cpu_data_vaddr
) {
1023 if (CpuDataEntries
[cur_cpu_num
].cpu_data_vaddr
->cpu_active_thread
== thread
) {
1033 ml_early_cpu_max_number(void)
1035 assert(startup_phase
>= STARTUP_SUB_TUNABLES
);
1036 return ml_get_max_cpu_number();
1040 ml_set_max_cpus(unsigned int max_cpus __unused
)
1042 lck_mtx_lock(&max_cpus_lock
);
1043 if (max_cpus_initialized
!= MAX_CPUS_SET
) {
1044 if (max_cpus_initialized
== MAX_CPUS_WAIT
) {
1045 thread_wakeup((event_t
) &max_cpus_initialized
);
1047 max_cpus_initialized
= MAX_CPUS_SET
;
1049 lck_mtx_unlock(&max_cpus_lock
);
1053 ml_wait_max_cpus(void)
1055 assert(lockdown_done
);
1056 lck_mtx_lock(&max_cpus_lock
);
1057 while (max_cpus_initialized
!= MAX_CPUS_SET
) {
1058 max_cpus_initialized
= MAX_CPUS_WAIT
;
1059 lck_mtx_sleep(&max_cpus_lock
, LCK_SLEEP_DEFAULT
, &max_cpus_initialized
, THREAD_UNINT
);
1061 lck_mtx_unlock(&max_cpus_lock
);
1062 return machine_info
.max_cpus
;
1068 * This is known to be inaccurate. mem_size should always be capped at 2 GB
1070 machine_info
.memory_size
= (uint32_t)mem_size
;
1072 // rdar://problem/58285685: Userland expects _COMM_PAGE_LOGICAL_CPUS to report
1073 // (max_cpu_id+1) rather than a literal *count* of logical CPUs.
1074 unsigned int num_cpus
= ml_get_topology_info()->max_cpu_id
+ 1;
1075 machine_info
.max_cpus
= num_cpus
;
1076 machine_info
.physical_cpu_max
= num_cpus
;
1077 machine_info
.logical_cpu_max
= num_cpus
;
1085 is_clock_configured
= TRUE
;
1086 if (debug_enabled
) {