X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3903760236c30e3b5ace7a4eefac3a269d68957c..c6bf4f310a33a9262d455ea4d3f0630b1255e3fe:/osfmk/kperf/kperf_timer.c

diff --git a/osfmk/kperf/kperf_timer.c b/osfmk/kperf/kperf_timer.c
index cfa429f2f..a6287f39c 100644
--- a/osfmk/kperf/kperf_timer.c
+++ b/osfmk/kperf/kperf_timer.c
@@ -31,6 +31,7 @@
 #include <mach/mach_types.h>
 #include <kern/cpu_data.h> /* current_thread() */
 #include <kern/kalloc.h>
+#include <stdatomic.h>
 #include <sys/errno.h>
 #include <sys/vm.h>
 #include <sys/ktrace.h>
@@ -55,20 +56,13 @@ unsigned int kperf_timerc = 0;
 
 static unsigned int pet_timer_id = 999;
 
+#define KPERF_TMR_ACTION_MASK (0xff)
+#define KPERF_TMR_ACTION(action_state) ((action_state) & KPERF_TMR_ACTION_MASK)
+#define KPERF_TMR_ACTIVE (0x100)
+
 /* maximum number of timers we can construct */
 #define TIMER_MAX (16)
 
-#if defined(__x86_64__)
-
-#define MIN_PERIOD_NS        (20 * NSEC_PER_USEC)
-#define MIN_PERIOD_BG_NS     (10 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_NS    (2 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_BG_NS (10 * NSEC_PER_MSEC)
-
-#else /* defined(__x86_64__) */
-#error "unsupported architecture"
-#endif /* defined(__x86_64__) */
-
 static uint64_t min_period_abstime;
 static uint64_t min_period_bg_abstime;
 static uint64_t min_period_pet_abstime;
@@ -111,12 +105,10 @@ kperf_timer_schedule(struct kperf_timer *timer, uint64_t now)
 	timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
 }
 
-void
-kperf_ipi_handler(void *param)
+static void
+kperf_sample_cpu(struct kperf_timer *timer, bool system_sample,
+    bool only_system)
 {
-	struct kperf_context ctx;
-	struct kperf_timer *timer = param;
-
 	assert(timer != NULL);
 
 	/* Always cut a tracepoint to show a sample event occurred */
@@ -125,17 +117,23 @@ kperf_ipi_handler(void *param)
 	int ncpu = cpu_number();
 
 	struct kperf_sample *intbuf = kperf_intr_sample_buffer();
+#if DEVELOPMENT || DEBUG
+	intbuf->sample_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
 
 	/* On a timer, we can see the "real" current thread */
-	ctx.cur_thread = current_thread();
-	ctx.cur_pid = task_pid(get_threadtask(ctx.cur_thread));
-
-	/* who fired */
-	ctx.trigger_type = TRIGGER_TYPE_TIMER;
-	ctx.trigger_id = (unsigned int)(timer - kperf_timerv);
+	thread_t thread = current_thread();
+	task_t task = get_threadtask(thread);
+	struct kperf_context ctx = {
+		.cur_thread = thread,
+		.cur_task = task,
+		.cur_pid = task_pid(task),
+		.trigger_type = TRIGGER_TYPE_TIMER,
+		.trigger_id = (unsigned int)(timer - kperf_timerv),
+	};
 
 	if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) {
-		kperf_thread_on_cpus[ncpu] = ctx.cur_thread;
+		kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread);
 	}
 
 	/* make sure sampling is on */
@@ -149,14 +147,21 @@ kperf_ipi_handler(void *param)
 	}
 
 	/* call the action -- kernel-only from interrupt, pend user */
-	int r = kperf_sample(intbuf, &ctx, timer->actionid, SAMPLE_FLAG_PEND_USER);
+	int r = kperf_sample(intbuf, &ctx, timer->actionid,
+	    SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) |
+	    (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0));
 
 	/* end tracepoint is informational */
 	BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r);
 
-#if defined(__x86_64__)
-	(void)atomic_bit_clear(&(timer->pending_cpus), ncpu, __ATOMIC_RELAXED);
-#endif /* defined(__x86_64__) */
+	(void)atomic_fetch_and_explicit(&timer->pending_cpus,
+	    ~(UINT64_C(1) << ncpu), memory_order_relaxed);
+}
+
+void
+kperf_ipi_handler(void *param)
+{
+	kperf_sample_cpu((struct kperf_timer *)param, false, false);
 }
 
 static void
@@ -165,8 +170,19 @@ kperf_timer_handler(void *param0, __unused void *param1)
 	struct kperf_timer *timer = param0;
 	unsigned int ntimer = (unsigned int)(timer - kperf_timerv);
 	unsigned int ncpus  = machine_info.logical_cpu_max;
+	bool system_only_self = true;
+
+	uint32_t action_state = atomic_fetch_or(&timer->action_state,
+	    KPERF_TMR_ACTIVE);
+
+	uint32_t actionid = KPERF_TMR_ACTION(action_state);
+	if (actionid == 0) {
+		goto deactivate;
+	}
 
-	timer->active = 1;
+#if DEVELOPMENT || DEBUG
+	timer->fire_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
 
 	/* along the lines of do not ipi if we are all shutting down */
 	if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) {
@@ -174,17 +190,26 @@ kperf_timer_handler(void *param0, __unused void *param1)
 	}
 
 	BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period,
-	                       timer->actionid);
+	    actionid);
 
 	if (ntimer == pet_timer_id) {
 		kperf_pet_fire_before();
 
 		/* clean-up the thread-on-CPUs cache */
-		bzero(kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus));
+		bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus));
 	}
 
-	/* ping all CPUs */
-	kperf_mp_broadcast_running(timer);
+	/*
+	 * IPI other cores only if the action has non-system samplers.
+	 */
+	if (kperf_action_has_non_system(actionid)) {
+		/*
+		 * If the core that's handling the timer is not scheduling
+		 * threads, only run system samplers.
+		 */
+		system_only_self = kperf_mp_broadcast_other_running(timer);
+	}
+	kperf_sample_cpu(timer, true, system_only_self);
 
 	/* release the pet thread? */
 	if (ntimer == pet_timer_id) {
@@ -192,16 +217,16 @@ kperf_timer_handler(void *param0, __unused void *param1)
 		kperf_pet_fire_after();
 	} else {
 		/*
-		  * FIXME: Get the current time from elsewhere.  The next
-		  * timer's period now includes the time taken to reach this
-		  * point.  This causes a bias towards longer sampling periods
-		  * than requested.
-		  */
+		 * FIXME: Get the current time from elsewhere.  The next
+		 * timer's period now includes the time taken to reach this
+		 * point.  This causes a bias towards longer sampling periods
+		 * than requested.
+		 */
 		kperf_timer_schedule(timer, mach_absolute_time());
 	}
 
 deactivate:
-	timer->active = 0;
+	atomic_fetch_and(&timer->action_state, ~KPERF_TMR_ACTIVE);
 }
 
 /* program the timer from the PET thread */
@@ -255,7 +280,7 @@ kperf_timer_pet_rearm(uint64_t elapsed_ticks)
 	BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline);
 
 	/* re-schedule the timer, making sure we don't apply slop */
-	timer_call_enter(&(timer->tcall), deadline, TIMER_CALL_SYS_CRITICAL);
+	timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
 
 	return;
 }
@@ -272,30 +297,54 @@ kperf_timer_go(void)
 	uint64_t now = mach_absolute_time();
 
 	for (unsigned int i = 0; i < kperf_timerc; i++) {
-		if (kperf_timerv[i].period == 0) {
+		struct kperf_timer *timer = &kperf_timerv[i];
+		if (timer->period == 0) {
 			continue;
 		}
 
-		kperf_timer_schedule(&(kperf_timerv[i]), now);
+		atomic_store(&timer->action_state,
+		    timer->actionid & KPERF_TMR_ACTION_MASK);
+		kperf_timer_schedule(timer, now);
 	}
 }
 
 void
 kperf_timer_stop(void)
 {
+	/*
+	 * Determine which timers are running and store them in a bitset, while
+	 * cancelling their timer call.
+	 */
+	uint64_t running_timers = 0;
 	for (unsigned int i = 0; i < kperf_timerc; i++) {
-		if (kperf_timerv[i].period == 0) {
+		struct kperf_timer *timer = &kperf_timerv[i];
+		if (timer->period == 0) {
 			continue;
 		}
 
-		/* wait for the timer to stop */
-		while (kperf_timerv[i].active);
+		uint32_t action_state = atomic_fetch_and(&timer->action_state,
+		    ~KPERF_TMR_ACTION_MASK);
+		if (action_state & KPERF_TMR_ACTIVE) {
+			bit_set(running_timers, i);
+		}
+
+		timer_call_cancel(&timer->tcall);
+	}
 
-		timer_call_cancel(&(kperf_timerv[i].tcall));
+	/*
+	 * Wait for any running timers to finish their critical sections.
+	 */
+	for (unsigned int i = lsb_first(running_timers); i < kperf_timerc;
+	    i = lsb_next(running_timers, i)) {
+		while (atomic_load(&kperf_timerv[i].action_state) != 0) {
+			delay(10);
+		}
 	}
 
-	/* wait for PET to stop, too */
-	kperf_pet_config(0);
+	if (pet_timer_id < kperf_timerc) {
+		/* wait for PET to stop, too */
+		kperf_pet_config(0);
+	}
 }
 
 unsigned int
@@ -399,9 +448,7 @@ kperf_timer_reset(void)
 	for (unsigned int i = 0; i < kperf_timerc; i++) {
 		kperf_timerv[i].period = 0;
 		kperf_timerv[i].actionid = 0;
-#if defined(__x86_64__)
-		kperf_timerv[i].pending_cpus = 0;
-#endif /* defined(__x86_64__) */
+		atomic_store_explicit(&kperf_timerv[i].pending_cpus, 0, memory_order_relaxed);
 	}
 }
 
@@ -412,11 +459,11 @@ kperf_timer_set_count(unsigned int count)
 	unsigned int old_count;
 
 	if (min_period_abstime == 0) {
-		nanoseconds_to_absolutetime(MIN_PERIOD_NS, &min_period_abstime);
-		nanoseconds_to_absolutetime(MIN_PERIOD_BG_NS, &min_period_bg_abstime);
-		nanoseconds_to_absolutetime(MIN_PERIOD_PET_NS, &min_period_pet_abstime);
-		nanoseconds_to_absolutetime(MIN_PERIOD_PET_BG_NS,
-			&min_period_pet_bg_abstime);
+		nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime);
+		nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime);
+		nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime);
+		nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS,
+		    &min_period_pet_bg_abstime);
 		assert(min_period_abstime > 0);
 	}
 
@@ -453,7 +500,7 @@ kperf_timer_set_count(unsigned int count)
 
 	/* create a new array */
 	new_timerv = kalloc_tag(count * sizeof(struct kperf_timer),
-		VM_KERN_MEMORY_DIAG);
+	    VM_KERN_MEMORY_DIAG);
 	if (new_timerv == NULL) {
 		return ENOMEM;
 	}
@@ -462,16 +509,16 @@ kperf_timer_set_count(unsigned int count)
 
 	if (old_timerv != NULL) {
 		bcopy(kperf_timerv, new_timerv,
-			kperf_timerc * sizeof(struct kperf_timer));
+		    kperf_timerc * sizeof(struct kperf_timer));
 	}
 
 	/* zero the new entries */
 	bzero(&(new_timerv[kperf_timerc]),
-		(count - old_count) * sizeof(struct kperf_timer));
+	    (count - old_count) * sizeof(struct kperf_timer));
 
 	/* (re-)setup the timer call info for all entries */
 	for (unsigned int i = 0; i < count; i++) {
-		timer_call_setup(&(new_timerv[i].tcall), kperf_timer_handler, &(new_timerv[i]));
+		timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]);
 	}
 
 	kperf_timerv = new_timerv;