#include <mach/mach_types.h>
#include <kern/cpu_data.h> /* current_thread() */
#include <kern/kalloc.h>
+#include <stdatomic.h>
#include <sys/errno.h>
#include <sys/vm.h>
#include <sys/ktrace.h>
static unsigned int pet_timer_id = 999;
+#define KPERF_TMR_ACTION_MASK (0xff)
+#define KPERF_TMR_ACTION(action_state) ((action_state) & KPERF_TMR_ACTION_MASK)
+#define KPERF_TMR_ACTIVE (0x100)
+
/* maximum number of timers we can construct */
#define TIMER_MAX (16)
-#if defined(__x86_64__)
-
-#define MIN_PERIOD_NS (20 * NSEC_PER_USEC)
-#define MIN_PERIOD_BG_NS (10 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_NS (2 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_BG_NS (10 * NSEC_PER_MSEC)
-
-#else /* defined(__x86_64__) */
-#error "unsupported architecture"
-#endif /* defined(__x86_64__) */
-
static uint64_t min_period_abstime;
static uint64_t min_period_bg_abstime;
static uint64_t min_period_pet_abstime;
timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
}
-void
-kperf_ipi_handler(void *param)
+static void
+kperf_sample_cpu(struct kperf_timer *timer, bool system_sample,
+ bool only_system)
{
- struct kperf_context ctx;
- struct kperf_timer *timer = param;
-
assert(timer != NULL);
/* Always cut a tracepoint to show a sample event occurred */
int ncpu = cpu_number();
struct kperf_sample *intbuf = kperf_intr_sample_buffer();
+#if DEVELOPMENT || DEBUG
+ intbuf->sample_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
/* On a timer, we can see the "real" current thread */
- ctx.cur_thread = current_thread();
- ctx.cur_pid = task_pid(get_threadtask(ctx.cur_thread));
-
- /* who fired */
- ctx.trigger_type = TRIGGER_TYPE_TIMER;
- ctx.trigger_id = (unsigned int)(timer - kperf_timerv);
+ thread_t thread = current_thread();
+ task_t task = get_threadtask(thread);
+ struct kperf_context ctx = {
+ .cur_thread = thread,
+ .cur_task = task,
+ .cur_pid = task_pid(task),
+ .trigger_type = TRIGGER_TYPE_TIMER,
+ .trigger_id = (unsigned int)(timer - kperf_timerv),
+ };
if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) {
- kperf_thread_on_cpus[ncpu] = ctx.cur_thread;
+ kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread);
}
/* make sure sampling is on */
}
/* call the action -- kernel-only from interrupt, pend user */
- int r = kperf_sample(intbuf, &ctx, timer->actionid, SAMPLE_FLAG_PEND_USER);
+ int r = kperf_sample(intbuf, &ctx, timer->actionid,
+ SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) |
+ (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0));
/* end tracepoint is informational */
BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r);
-#if defined(__x86_64__)
- (void)atomic_bit_clear(&(timer->pending_cpus), ncpu, __ATOMIC_RELAXED);
-#endif /* defined(__x86_64__) */
+ (void)atomic_fetch_and_explicit(&timer->pending_cpus,
+ ~(UINT64_C(1) << ncpu), memory_order_relaxed);
+}
+
+void
+kperf_ipi_handler(void *param)
+{
+ kperf_sample_cpu((struct kperf_timer *)param, false, false);
}
static void
struct kperf_timer *timer = param0;
unsigned int ntimer = (unsigned int)(timer - kperf_timerv);
unsigned int ncpus = machine_info.logical_cpu_max;
+ bool system_only_self = true;
+
+ uint32_t action_state = atomic_fetch_or(&timer->action_state,
+ KPERF_TMR_ACTIVE);
+
+ uint32_t actionid = KPERF_TMR_ACTION(action_state);
+ if (actionid == 0) {
+ goto deactivate;
+ }
- timer->active = 1;
+#if DEVELOPMENT || DEBUG
+ timer->fire_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
/* along the lines of do not ipi if we are all shutting down */
if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) {
}
BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period,
- timer->actionid);
+ actionid);
if (ntimer == pet_timer_id) {
kperf_pet_fire_before();
/* clean-up the thread-on-CPUs cache */
- bzero(kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus));
+ bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus));
}
- /* ping all CPUs */
- kperf_mp_broadcast_running(timer);
+ /*
+ * IPI other cores only if the action has non-system samplers.
+ */
+ if (kperf_action_has_non_system(actionid)) {
+ /*
+ * If the core that's handling the timer is not scheduling
+ * threads, only run system samplers.
+ */
+ system_only_self = kperf_mp_broadcast_other_running(timer);
+ }
+ kperf_sample_cpu(timer, true, system_only_self);
/* release the pet thread? */
if (ntimer == pet_timer_id) {
kperf_pet_fire_after();
} else {
/*
- * FIXME: Get the current time from elsewhere. The next
- * timer's period now includes the time taken to reach this
- * point. This causes a bias towards longer sampling periods
- * than requested.
- */
+ * FIXME: Get the current time from elsewhere. The next
+ * timer's period now includes the time taken to reach this
+ * point. This causes a bias towards longer sampling periods
+ * than requested.
+ */
kperf_timer_schedule(timer, mach_absolute_time());
}
deactivate:
- timer->active = 0;
+ atomic_fetch_and(&timer->action_state, ~KPERF_TMR_ACTIVE);
}
/* program the timer from the PET thread */
BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline);
/* re-schedule the timer, making sure we don't apply slop */
- timer_call_enter(&(timer->tcall), deadline, TIMER_CALL_SYS_CRITICAL);
+ timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
return;
}
uint64_t now = mach_absolute_time();
for (unsigned int i = 0; i < kperf_timerc; i++) {
- if (kperf_timerv[i].period == 0) {
+ struct kperf_timer *timer = &kperf_timerv[i];
+ if (timer->period == 0) {
continue;
}
- kperf_timer_schedule(&(kperf_timerv[i]), now);
+ atomic_store(&timer->action_state,
+ timer->actionid & KPERF_TMR_ACTION_MASK);
+ kperf_timer_schedule(timer, now);
}
}
void
kperf_timer_stop(void)
{
+ /*
+ * Determine which timers are running and store them in a bitset, while
+ * cancelling their timer call.
+ */
+ uint64_t running_timers = 0;
for (unsigned int i = 0; i < kperf_timerc; i++) {
- if (kperf_timerv[i].period == 0) {
+ struct kperf_timer *timer = &kperf_timerv[i];
+ if (timer->period == 0) {
continue;
}
- /* wait for the timer to stop */
- while (kperf_timerv[i].active);
+ uint32_t action_state = atomic_fetch_and(&timer->action_state,
+ ~KPERF_TMR_ACTION_MASK);
+ if (action_state & KPERF_TMR_ACTIVE) {
+ bit_set(running_timers, i);
+ }
+
+ timer_call_cancel(&timer->tcall);
+ }
- timer_call_cancel(&(kperf_timerv[i].tcall));
+ /*
+ * Wait for any running timers to finish their critical sections.
+ */
+ for (unsigned int i = lsb_first(running_timers); i < kperf_timerc;
+ i = lsb_next(running_timers, i)) {
+ while (atomic_load(&kperf_timerv[i].action_state) != 0) {
+ delay(10);
+ }
}
- /* wait for PET to stop, too */
- kperf_pet_config(0);
+ if (pet_timer_id < kperf_timerc) {
+ /* wait for PET to stop, too */
+ kperf_pet_config(0);
+ }
}
unsigned int
for (unsigned int i = 0; i < kperf_timerc; i++) {
kperf_timerv[i].period = 0;
kperf_timerv[i].actionid = 0;
-#if defined(__x86_64__)
- kperf_timerv[i].pending_cpus = 0;
-#endif /* defined(__x86_64__) */
+ atomic_store_explicit(&kperf_timerv[i].pending_cpus, 0, memory_order_relaxed);
}
}
unsigned int old_count;
if (min_period_abstime == 0) {
- nanoseconds_to_absolutetime(MIN_PERIOD_NS, &min_period_abstime);
- nanoseconds_to_absolutetime(MIN_PERIOD_BG_NS, &min_period_bg_abstime);
- nanoseconds_to_absolutetime(MIN_PERIOD_PET_NS, &min_period_pet_abstime);
- nanoseconds_to_absolutetime(MIN_PERIOD_PET_BG_NS,
- &min_period_pet_bg_abstime);
+ nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime);
+ nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime);
+ nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime);
+ nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS,
+ &min_period_pet_bg_abstime);
assert(min_period_abstime > 0);
}
/* create a new array */
new_timerv = kalloc_tag(count * sizeof(struct kperf_timer),
- VM_KERN_MEMORY_DIAG);
+ VM_KERN_MEMORY_DIAG);
if (new_timerv == NULL) {
return ENOMEM;
}
if (old_timerv != NULL) {
bcopy(kperf_timerv, new_timerv,
- kperf_timerc * sizeof(struct kperf_timer));
+ kperf_timerc * sizeof(struct kperf_timer));
}
/* zero the new entries */
bzero(&(new_timerv[kperf_timerc]),
- (count - old_count) * sizeof(struct kperf_timer));
+ (count - old_count) * sizeof(struct kperf_timer));
/* (re-)setup the timer call info for all entries */
for (unsigned int i = 0; i < count; i++) {
- timer_call_setup(&(new_timerv[i].tcall), kperf_timer_handler, &(new_timerv[i]));
+ timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]);
}
kperf_timerv = new_timerv;