2 * Copyright (c) 2011-2018 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * This file manages the timers used for on-CPU samples and PET.
32 * Each timer configured by a tool is represented by a kptimer structure.
33 * The timer calls present in each structure are used to schedule CPU-local
34 * timers. As each timer fires, that CPU samples itself and schedules another
35 * timer to fire at the next deadline. The first timer to fire across all CPUs
36 * determines that deadline. This causes the timers to fire at a consistent
39 * Traditional PET uses a timer call to wake up its sampling thread and take
42 * Synchronization for start and stop is provided by the ktrace subsystem lock.
43 * Global state is stored in a single struct, to ease debugging.
46 #include <mach/mach_types.h>
47 #include <kern/cpu_data.h> /* current_thread() */
48 #include <kern/kalloc.h>
49 #include <kern/timer_queue.h>
50 #include <libkern/section_keywords.h>
51 #include <stdatomic.h>
52 #include <sys/errno.h>
54 #include <sys/ktrace.h>
56 #include <machine/machine_routines.h>
57 #if defined(__x86_64__)
59 #endif /* defined(__x86_64__) */
61 #include <kperf/kperf.h>
62 #include <kperf/buffer.h>
63 #include <kperf/context.h>
64 #include <kperf/action.h>
65 #include <kperf/kptimer.h>
66 #include <kperf/pet.h>
67 #include <kperf/sample.h>
69 #define KPTIMER_PET_INACTIVE (999)
70 #define KPTIMER_MAX (8)
74 uint64_t kt_period_abs
;
76 * The `kt_cur_deadline` field represents when the timer should next fire.
77 * It's used to synchronize between timers firing on each CPU. In the timer
78 * handler, each CPU will take the `kt_lock` and see if the
79 * `kt_cur_deadline` still needs to be updated for the timer fire. If so,
80 * it updates it and logs the timer fire event under the lock.
83 uint64_t kt_cur_deadline
;
85 #if DEVELOPMENT || DEBUG
87 * To be set by the timer leader as a debugging aid for timeouts, if kperf
88 * happens to be on-CPU when they occur.
90 uint64_t kt_fire_time
;
91 #endif /* DEVELOPMENT || DEBUG */
95 struct kptimer
*g_timers
;
96 uint64_t *g_cpu_deadlines
;
97 unsigned int g_ntimers
;
98 unsigned int g_pet_timerid
;
104 struct timer_call g_pet_timer
;
106 .g_pet_timerid
= KPTIMER_PET_INACTIVE
,
109 SECURITY_READ_ONLY_LATE(static uint64_t) kptimer_minperiods_mtu
[KTPL_MAX
];
112 * Enforce a minimum timer period to prevent interrupt storms.
114 const uint64_t kptimer_minperiods_ns
[KTPL_MAX
] = {
115 #if defined(__x86_64__)
116 [KTPL_FG
] = 20 * NSEC_PER_USEC
, /* The minimum timer period in xnu, period. */
117 [KTPL_BG
] = 1 * NSEC_PER_MSEC
,
118 [KTPL_FG_PET
] = 2 * NSEC_PER_MSEC
,
119 [KTPL_BG_PET
] = 5 * NSEC_PER_MSEC
,
120 #elif defined(__arm64__)
121 [KTPL_FG
] = 50 * NSEC_PER_USEC
,
122 [KTPL_BG
] = 1 * NSEC_PER_MSEC
,
123 [KTPL_FG_PET
] = 2 * NSEC_PER_MSEC
,
124 [KTPL_BG_PET
] = 10 * NSEC_PER_MSEC
,
125 #elif defined(__arm__)
126 [KTPL_FG
] = 100 * NSEC_PER_USEC
,
127 [KTPL_BG
] = 10 * NSEC_PER_MSEC
,
128 [KTPL_FG_PET
] = 2 * NSEC_PER_MSEC
,
129 [KTPL_BG_PET
] = 50 * NSEC_PER_MSEC
,
131 #error unexpected architecture
135 static void kptimer_pet_handler(void * __unused param1
, void * __unused param2
);
136 static void kptimer_stop_curcpu(processor_t processor
);
141 for (int i
= 0; i
< KTPL_MAX
; i
++) {
142 nanoseconds_to_absolutetime(kptimer_minperiods_ns
[i
],
143 &kptimer_minperiods_mtu
[i
]);
148 kptimer_set_cpu_deadline(int cpuid
, int timerid
, uint64_t deadline
)
150 kptimer
.g_cpu_deadlines
[(cpuid
* KPTIMER_MAX
) + timerid
] =
157 if (kptimer
.g_setup
) {
160 static lck_grp_t kptimer_lock_grp
;
161 lck_grp_init(&kptimer_lock_grp
, "kptimer", LCK_GRP_ATTR_NULL
);
163 const size_t timers_size
= KPTIMER_MAX
* sizeof(struct kptimer
);
164 kptimer
.g_timers
= kalloc_tag(timers_size
, VM_KERN_MEMORY_DIAG
);
165 assert(kptimer
.g_timers
!= NULL
);
166 memset(kptimer
.g_timers
, 0, timers_size
);
167 for (int i
= 0; i
< KPTIMER_MAX
; i
++) {
168 lck_spin_init(&kptimer
.g_timers
[i
].kt_lock
, &kptimer_lock_grp
,
172 const size_t deadlines_size
= machine_info
.logical_cpu_max
* KPTIMER_MAX
*
173 sizeof(kptimer
.g_cpu_deadlines
[0]);
174 kptimer
.g_cpu_deadlines
= kalloc_tag(deadlines_size
, VM_KERN_MEMORY_DIAG
);
175 assert(kptimer
.g_cpu_deadlines
!= NULL
);
176 memset(kptimer
.g_cpu_deadlines
, 0, deadlines_size
);
177 for (int i
= 0; i
< KPTIMER_MAX
; i
++) {
178 for (int j
= 0; j
< machine_info
.logical_cpu_max
; j
++) {
179 kptimer_set_cpu_deadline(j
, i
, EndOfAllTime
);
183 timer_call_setup(&kptimer
.g_pet_timer
, kptimer_pet_handler
, NULL
);
185 kptimer
.g_setup
= true;
192 kptimer_set_pet_timerid(KPTIMER_PET_INACTIVE
);
194 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
195 kptimer
.g_timers
[i
].kt_period_abs
= 0;
196 kptimer
.g_timers
[i
].kt_actionid
= 0;
197 for (int j
= 0; j
< machine_info
.logical_cpu_max
; j
++) {
198 kptimer_set_cpu_deadline(j
, i
, EndOfAllTime
);
203 #pragma mark - deadline management
206 kptimer_get_cpu_deadline(int cpuid
, int timerid
)
208 return kptimer
.g_cpu_deadlines
[(cpuid
* KPTIMER_MAX
) + timerid
];
212 kptimer_sample_curcpu(unsigned int actionid
, unsigned int timerid
,
215 struct kperf_sample
*intbuf
= kperf_intr_sample_buffer();
216 #if DEVELOPMENT || DEBUG
217 intbuf
->sample_time
= mach_absolute_time();
218 #endif /* DEVELOPMENT || DEBUG */
220 BUF_DATA(PERF_TM_HNDLR
| DBG_FUNC_START
);
222 thread_t thread
= current_thread();
223 task_t task
= get_threadtask(thread
);
224 struct kperf_context ctx
= {
225 .cur_thread
= thread
,
227 .cur_pid
= task_pid(task
),
228 .trigger_type
= TRIGGER_TYPE_TIMER
,
229 .trigger_id
= timerid
,
232 (void)kperf_sample(intbuf
, &ctx
, actionid
,
233 SAMPLE_FLAG_PEND_USER
| flags
);
235 BUF_INFO(PERF_TM_HNDLR
| DBG_FUNC_END
);
239 kptimer_lock(struct kptimer
*timer
)
241 lck_spin_lock(&timer
->kt_lock
);
245 kptimer_unlock(struct kptimer
*timer
)
247 lck_spin_unlock(&timer
->kt_lock
);
251 * If the deadline expired in the past, find the next deadline to program,
252 * locked into the cadence provided by the period.
254 static inline uint64_t
255 dead_reckon_deadline(uint64_t now
, uint64_t deadline
, uint64_t period
)
257 if (deadline
< now
) {
258 uint64_t time_since
= now
- deadline
;
259 uint64_t extra_time
= period
- (time_since
% period
);
260 return now
+ extra_time
;
266 kptimer_fire(struct kptimer
*timer
, unsigned int timerid
,
267 uint64_t deadline
, int __unused cpuid
, uint64_t now
)
270 uint64_t next_deadline
= deadline
+ timer
->kt_period_abs
;
273 * It's not straightforward to replace this lock with a compare-exchange,
274 * since the PERF_TM_FIRE event must be emitted *before* any subsequent
275 * PERF_TM_HNDLR events, so tools can understand the handlers are responding
276 * to this timer fire.
279 if (timer
->kt_cur_deadline
< next_deadline
) {
281 next_deadline
= dead_reckon_deadline(now
, next_deadline
,
282 timer
->kt_period_abs
);
283 timer
->kt_cur_deadline
= next_deadline
;
284 BUF_DATA(PERF_TM_FIRE
, timerid
, timerid
== kptimer
.g_pet_timerid
,
285 timer
->kt_period_abs
, timer
->kt_actionid
);
286 #if DEVELOPMENT || DEBUG
288 * Debugging aid to see the last time this timer fired.
290 timer
->kt_fire_time
= mach_absolute_time();
291 #endif /* DEVELOPMENT || DEBUG */
292 if (timerid
== kptimer
.g_pet_timerid
&& kppet_get_lightweight_pet()) {
293 os_atomic_inc(&kppet_gencount
, relaxed
);
297 * In case this CPU has missed several timer fires, get it back on track
298 * by synchronizing with the latest timer fire.
300 next_deadline
= timer
->kt_cur_deadline
;
302 kptimer_unlock(timer
);
304 if (!first
&& !kperf_action_has_non_system(timer
->kt_actionid
)) {
306 * The first timer to fire will sample the system, so there's
307 * no need to run other timers if those are the only samplers
310 return next_deadline
;
313 kptimer_sample_curcpu(timer
->kt_actionid
, timerid
,
314 first
? SAMPLE_FLAG_SYSTEM
: 0);
316 return next_deadline
;
320 * Determine which of the timers fired.
323 kptimer_expire(processor_t processor
, int cpuid
, uint64_t now
)
325 uint64_t min_deadline
= UINT64_MAX
;
327 if (kperf_status
!= KPERF_SAMPLING_ON
) {
328 if (kperf_status
== KPERF_SAMPLING_SHUTDOWN
) {
329 kptimer_stop_curcpu(processor
);
331 } else if (kperf_status
== KPERF_SAMPLING_OFF
) {
332 panic("kperf: timer fired at %llu, but sampling is disabled", now
);
334 panic("kperf: unknown sampling state 0x%x", kperf_status
);
338 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
339 struct kptimer
*timer
= &kptimer
.g_timers
[i
];
340 if (timer
->kt_period_abs
== 0) {
344 uint64_t cpudeadline
= kptimer_get_cpu_deadline(cpuid
, i
);
345 if (now
> cpudeadline
) {
346 uint64_t deadline
= kptimer_fire(timer
, i
, cpudeadline
, cpuid
, now
);
348 kptimer_set_cpu_deadline(cpuid
, i
, EndOfAllTime
);
350 kptimer_set_cpu_deadline(cpuid
, i
, deadline
);
351 if (deadline
< min_deadline
) {
352 min_deadline
= deadline
;
357 if (min_deadline
< UINT64_MAX
) {
358 running_timer_enter(processor
, RUNNING_TIMER_KPERF
, NULL
,
359 min_deadline
, mach_absolute_time());
363 #pragma mark - start/stop
366 kptimer_broadcast(void (*fn
)(void *))
368 ktrace_assert_lock_held();
370 #if defined(__x86_64__)
371 (void)mp_cpus_call(CPUMASK_ALL
, ASYNC
, fn
, NULL
);
372 #else /* defined(__x86_64__) */
373 _Atomic
uint32_t xcsync
= 0;
374 cpu_broadcast_xcall((uint32_t *)&xcsync
, TRUE
/* include self */, fn
,
376 #endif /* !defined(__x86_64__) */
380 kptimer_broadcast_ack(void *arg
)
382 #if defined(__x86_64__)
384 #else /* defined(__x86_64__) */
385 _Atomic
uint32_t *xcsync
= arg
;
386 int pending
= os_atomic_dec(xcsync
, relaxed
);
388 thread_wakeup(xcsync
);
390 #endif /* !defined(__x86_64__) */
394 kptimer_sample_pet_remote(void * __unused arg
)
396 if (!kperf_is_sampling()) {
399 struct kptimer
*timer
= &kptimer
.g_timers
[kptimer
.g_pet_timerid
];
400 kptimer_sample_curcpu(timer
->kt_actionid
, kptimer
.g_pet_timerid
, 0);
403 #if !defined(__x86_64__)
405 #include <arm/cpu_internal.h>
407 void kperf_signal_handler(void);
409 kperf_signal_handler(void)
411 kptimer_sample_pet_remote(NULL
);
414 #endif /* !defined(__x86_64__) */
416 #include <stdatomic.h>
417 _Atomic
uint64_t mycounter
= 0;
420 kptimer_broadcast_pet(void)
422 atomic_fetch_add(&mycounter
, 1);
423 #if defined(__x86_64__)
424 (void)mp_cpus_call(CPUMASK_OTHERS
, NOSYNC
, kptimer_sample_pet_remote
,
426 #else /* defined(__x86_64__) */
427 int curcpu
= cpu_number();
428 for (int i
= 0; i
< machine_info
.logical_cpu_max
; i
++) {
430 cpu_signal(cpu_datap(i
), SIGPkppet
, NULL
, NULL
);
433 #endif /* !defined(__x86_64__) */
437 kptimer_pet_handler(void * __unused param1
, void * __unused param2
)
439 if (!kptimer
.g_pet_active
) {
443 struct kptimer
*timer
= &kptimer
.g_timers
[kptimer
.g_pet_timerid
];
445 BUF_DATA(PERF_TM_FIRE
, kptimer
.g_pet_timerid
, 1, timer
->kt_period_abs
,
449 * To get the on-CPU samples as close to this timer fire as possible, first
450 * broadcast to them to sample themselves.
452 kptimer_broadcast_pet();
455 * Wakeup the PET thread afterwards so it's not inadvertently sampled (it's a
456 * high-priority kernel thread). If the scheduler needs to IPI to run it,
457 * that IPI will be handled after the IPIs issued during the broadcast.
462 * Finally, sample this CPU, who's stacks and state have been preserved while
463 * running this handler. Make sure to include system measurements.
465 kptimer_sample_curcpu(timer
->kt_actionid
, kptimer
.g_pet_timerid
,
468 BUF_INFO(PERF_TM_FIRE
| DBG_FUNC_END
);
471 * The PET thread will re-arm the timer when it's done.
476 kptimer_pet_enter(uint64_t sampledur_abs
)
478 if (!kperf_is_sampling()) {
482 uint64_t period_abs
= kptimer
.g_timers
[kptimer
.g_pet_timerid
].kt_period_abs
;
483 uint64_t orig_period_abs
= period_abs
;
485 if (period_abs
> sampledur_abs
) {
486 period_abs
-= sampledur_abs
;
488 period_abs
= MAX(kptimer_min_period_abs(true), period_abs
);
489 uint64_t deadline_abs
= mach_absolute_time() + period_abs
;
491 BUF_INFO(PERF_PET_SCHED
, orig_period_abs
, period_abs
, sampledur_abs
,
494 timer_call_enter(&kptimer
.g_pet_timer
, deadline_abs
, TIMER_CALL_SYS_CRITICAL
);
498 kptimer_earliest_deadline(processor_t processor
, uint64_t now
)
500 uint64_t min_deadline
= UINT64_MAX
;
501 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
502 struct kptimer
*timer
= &kptimer
.g_timers
[i
];
503 uint64_t cur_deadline
= timer
->kt_cur_deadline
;
504 if (cur_deadline
== 0) {
507 cur_deadline
= dead_reckon_deadline(now
, cur_deadline
,
508 timer
->kt_period_abs
);
509 kptimer_set_cpu_deadline(processor
->cpu_id
, i
, cur_deadline
);
510 if (cur_deadline
< min_deadline
) {
511 min_deadline
= cur_deadline
;
517 void kptimer_running_setup(processor_t processor
, uint64_t now
);
519 kptimer_running_setup(processor_t processor
, uint64_t now
)
521 uint64_t deadline
= kptimer_earliest_deadline(processor
, now
);
522 if (deadline
< UINT64_MAX
) {
523 running_timer_setup(processor
, RUNNING_TIMER_KPERF
, NULL
, deadline
,
529 kptimer_start_remote(void *arg
)
531 processor_t processor
= current_processor();
532 uint64_t now
= mach_absolute_time();
533 uint64_t deadline
= kptimer_earliest_deadline(processor
, now
);
534 if (deadline
< UINT64_MAX
) {
535 running_timer_enter(processor
, RUNNING_TIMER_KPERF
, NULL
, deadline
,
538 kptimer_broadcast_ack(arg
);
542 kptimer_stop_curcpu(processor_t processor
)
544 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
545 kptimer_set_cpu_deadline(processor
->cpu_id
, i
, EndOfAllTime
);
547 running_timer_cancel(processor
, RUNNING_TIMER_KPERF
);
551 kptimer_stop_remote(void * __unused arg
)
553 assert(ml_get_interrupts_enabled() == FALSE
);
554 kptimer_stop_curcpu(current_processor());
555 kptimer_broadcast_ack(arg
);
561 ktrace_assert_lock_held();
563 if (kptimer
.g_started
) {
567 uint64_t now
= mach_absolute_time();
568 unsigned int ntimers_active
= 0;
569 kptimer
.g_started
= true;
570 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
571 struct kptimer
*timer
= &kptimer
.g_timers
[i
];
572 if (timer
->kt_period_abs
== 0 || timer
->kt_actionid
== 0) {
574 * No period or action means the timer is inactive.
577 } else if (!kppet_get_lightweight_pet() &&
578 i
== kptimer
.g_pet_timerid
) {
579 kptimer
.g_pet_active
= true;
580 timer_call_enter(&kptimer
.g_pet_timer
, now
+ timer
->kt_period_abs
,
581 TIMER_CALL_SYS_CRITICAL
);
583 timer
->kt_cur_deadline
= now
+ timer
->kt_period_abs
;
587 if (ntimers_active
> 0) {
588 kptimer_broadcast(kptimer_start_remote
);
595 ktrace_assert_lock_held();
597 if (!kptimer
.g_started
) {
601 int intrs_en
= ml_set_interrupts_enabled(FALSE
);
603 if (kptimer
.g_pet_active
) {
604 kptimer
.g_pet_active
= false;
605 timer_call_cancel(&kptimer
.g_pet_timer
);
607 kptimer
.g_started
= false;
608 kptimer_broadcast(kptimer_stop_remote
);
609 for (unsigned int i
= 0; i
< kptimer
.g_ntimers
; i
++) {
610 kptimer
.g_timers
[i
].kt_cur_deadline
= 0;
613 ml_set_interrupts_enabled(intrs_en
);
616 #pragma mark - accessors
619 kptimer_get_period(unsigned int timerid
, uint64_t *period_abs
)
621 if (timerid
>= kptimer
.g_ntimers
) {
624 *period_abs
= kptimer
.g_timers
[timerid
].kt_period_abs
;
629 kptimer_set_period(unsigned int timerid
, uint64_t period_abs
)
631 if (timerid
>= kptimer
.g_ntimers
) {
634 if (kptimer
.g_started
) {
638 bool pet
= kptimer
.g_pet_timerid
== timerid
;
639 uint64_t min_period
= kptimer_min_period_abs(pet
);
640 if (period_abs
!= 0 && period_abs
< min_period
) {
641 period_abs
= min_period
;
643 if (pet
&& !kppet_get_lightweight_pet()) {
644 kppet_config(kptimer
.g_timers
[timerid
].kt_actionid
);
647 kptimer
.g_timers
[timerid
].kt_period_abs
= period_abs
;
652 kptimer_get_action(unsigned int timerid
, unsigned int *actionid
)
654 if (timerid
>= kptimer
.g_ntimers
) {
657 *actionid
= kptimer
.g_timers
[timerid
].kt_actionid
;
662 kptimer_set_action(unsigned int timerid
, unsigned int actionid
)
664 if (timerid
>= kptimer
.g_ntimers
) {
667 if (kptimer
.g_started
) {
671 kptimer
.g_timers
[timerid
].kt_actionid
= actionid
;
672 if (kptimer
.g_pet_timerid
== timerid
&& !kppet_get_lightweight_pet()) {
673 kppet_config(actionid
);
679 kptimer_get_count(void)
681 return kptimer
.g_ntimers
;
685 kptimer_set_count(unsigned int count
)
688 if (kptimer
.g_started
) {
691 if (count
> KPTIMER_MAX
) {
694 kptimer
.g_ntimers
= count
;
699 kptimer_min_period_abs(bool pet
)
701 enum kptimer_period_limit limit
= 0;
702 if (ktrace_background_active()) {
703 limit
= pet
? KTPL_BG_PET
: KTPL_BG
;
705 limit
= pet
? KTPL_FG_PET
: KTPL_FG
;
707 return kptimer_minperiods_mtu
[limit
];
711 kptimer_get_pet_timerid(void)
713 return kptimer
.g_pet_timerid
;
717 kptimer_set_pet_timerid(uint32_t petid
)
719 if (kptimer
.g_started
) {
722 if (petid
>= kptimer
.g_ntimers
) {
725 kppet_config(kptimer
.g_timers
[petid
].kt_actionid
);
726 uint64_t period_abs
= MAX(kptimer_min_period_abs(true),
727 kptimer
.g_timers
[petid
].kt_period_abs
);
728 kptimer
.g_timers
[petid
].kt_period_abs
= period_abs
;
731 kptimer
.g_pet_timerid
= petid
;