xnu-6153.81.5.tar.gz

[apple/xnu.git] / osfmk / kperf / kperf_timer.c
diff --git a/osfmk/kperf/kperf_timer.c b/osfmk/kperf/kperf_timer.c

index cfa429f2f2becbc6a28c15ee516b276f407bd9ff..a6287f39c7eafcec13f800f8218eded62d6acbe2 100644 (file)
--- a/osfmk/kperf/kperf_timer.c
+++ b/osfmk/kperf/kperf_timer.c
@@ -31,6 +31,7 @@
  #include <mach/mach_types.h>
  #include <kern/cpu_data.h> /* current_thread() */
  #include <kern/kalloc.h>
+#include <stdatomic.h>
  #include <sys/errno.h>
  #include <sys/vm.h>
  #include <sys/ktrace.h>
@@ -55,20 +56,13 @@ unsigned int kperf_timerc = 0;
  
  static unsigned int pet_timer_id = 999;
  
+#define KPERF_TMR_ACTION_MASK (0xff)
+#define KPERF_TMR_ACTION(action_state) ((action_state) & KPERF_TMR_ACTION_MASK)
+#define KPERF_TMR_ACTIVE (0x100)
+
  /* maximum number of timers we can construct */
  #define TIMER_MAX (16)
  
-#if defined(__x86_64__)
-
-#define MIN_PERIOD_NS        (20 * NSEC_PER_USEC)
-#define MIN_PERIOD_BG_NS     (10 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_NS    (2 * NSEC_PER_MSEC)
-#define MIN_PERIOD_PET_BG_NS (10 * NSEC_PER_MSEC)
-
-#else /* defined(__x86_64__) */
-#error "unsupported architecture"
-#endif /* defined(__x86_64__) */
-
  static uint64_t min_period_abstime;
  static uint64_t min_period_bg_abstime;
  static uint64_t min_period_pet_abstime;
@@ -111,12 +105,10 @@ kperf_timer_schedule(struct kperf_timer *timer, uint64_t now)
         timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
  }
  
-void
-kperf_ipi_handler(void *param)
+static void
+kperf_sample_cpu(struct kperf_timer *timer, bool system_sample,
+    bool only_system)
  {
-       struct kperf_context ctx;
-       struct kperf_timer *timer = param;
-
         assert(timer != NULL);
  
         /* Always cut a tracepoint to show a sample event occurred */
@@ -125,17 +117,23 @@ kperf_ipi_handler(void *param)
         int ncpu = cpu_number();
  
         struct kperf_sample *intbuf = kperf_intr_sample_buffer();
+#if DEVELOPMENT || DEBUG
+       intbuf->sample_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
  
         /* On a timer, we can see the "real" current thread */
-       ctx.cur_thread = current_thread();
-       ctx.cur_pid = task_pid(get_threadtask(ctx.cur_thread));
-
-       /* who fired */
-       ctx.trigger_type = TRIGGER_TYPE_TIMER;
-       ctx.trigger_id = (unsigned int)(timer - kperf_timerv);
+       thread_t thread = current_thread();
+       task_t task = get_threadtask(thread);
+       struct kperf_context ctx = {
+               .cur_thread = thread,
+               .cur_task = task,
+               .cur_pid = task_pid(task),
+               .trigger_type = TRIGGER_TYPE_TIMER,
+               .trigger_id = (unsigned int)(timer - kperf_timerv),
+       };
  
         if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) {
-               kperf_thread_on_cpus[ncpu] = ctx.cur_thread;
+               kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread);
         }
  
         /* make sure sampling is on */
@@ -149,14 +147,21 @@ kperf_ipi_handler(void *param)
         }
  
         /* call the action -- kernel-only from interrupt, pend user */
-       int r = kperf_sample(intbuf, &ctx, timer->actionid, SAMPLE_FLAG_PEND_USER);
+       int r = kperf_sample(intbuf, &ctx, timer->actionid,
+           SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) |
+           (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0));
  
         /* end tracepoint is informational */
         BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r);
  
-#if defined(__x86_64__)
-       (void)atomic_bit_clear(&(timer->pending_cpus), ncpu, __ATOMIC_RELAXED);
-#endif /* defined(__x86_64__) */
+       (void)atomic_fetch_and_explicit(&timer->pending_cpus,
+           ~(UINT64_C(1) << ncpu), memory_order_relaxed);
+}
+
+void
+kperf_ipi_handler(void *param)
+{
+       kperf_sample_cpu((struct kperf_timer *)param, false, false);
  }
  
  static void
@@ -165,8 +170,19 @@ kperf_timer_handler(void *param0, __unused void *param1)
         struct kperf_timer *timer = param0;
         unsigned int ntimer = (unsigned int)(timer - kperf_timerv);
         unsigned int ncpus  = machine_info.logical_cpu_max;
+       bool system_only_self = true;
+
+       uint32_t action_state = atomic_fetch_or(&timer->action_state,
+           KPERF_TMR_ACTIVE);
+
+       uint32_t actionid = KPERF_TMR_ACTION(action_state);
+       if (actionid == 0) {
+               goto deactivate;
+       }
  
-       timer->active = 1;
+#if DEVELOPMENT || DEBUG
+       timer->fire_time = mach_absolute_time();
+#endif /* DEVELOPMENT || DEBUG */
  
         /* along the lines of do not ipi if we are all shutting down */
         if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) {
@@ -174,17 +190,26 @@ kperf_timer_handler(void *param0, __unused void *param1)
         }
  
         BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period,
-                              timer->actionid);
+           actionid);
  
         if (ntimer == pet_timer_id) {
                 kperf_pet_fire_before();
  
                 /* clean-up the thread-on-CPUs cache */
-               bzero(kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus));
+               bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus));
         }
  
-       /* ping all CPUs */
-       kperf_mp_broadcast_running(timer);
+       /*
+        * IPI other cores only if the action has non-system samplers.
+        */
+       if (kperf_action_has_non_system(actionid)) {
+               /*
+                * If the core that's handling the timer is not scheduling
+                * threads, only run system samplers.
+                */
+               system_only_self = kperf_mp_broadcast_other_running(timer);
+       }
+       kperf_sample_cpu(timer, true, system_only_self);
  
         /* release the pet thread? */
         if (ntimer == pet_timer_id) {
@@ -192,16 +217,16 @@ kperf_timer_handler(void *param0, __unused void *param1)
                 kperf_pet_fire_after();
         } else {
                 /*
-                 * FIXME: Get the current time from elsewhere.  The next
-                 * timer's period now includes the time taken to reach this
-                 * point.  This causes a bias towards longer sampling periods
-                 * than requested.
-                 */
+                * FIXME: Get the current time from elsewhere.  The next
+                * timer's period now includes the time taken to reach this
+                * point.  This causes a bias towards longer sampling periods
+                * than requested.
+                */
                 kperf_timer_schedule(timer, mach_absolute_time());
         }
  
  deactivate:
-       timer->active = 0;
+       atomic_fetch_and(&timer->action_state, ~KPERF_TMR_ACTIVE);
  }
  
  /* program the timer from the PET thread */
@@ -255,7 +280,7 @@ kperf_timer_pet_rearm(uint64_t elapsed_ticks)
         BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline);
  
         /* re-schedule the timer, making sure we don't apply slop */
-       timer_call_enter(&(timer->tcall), deadline, TIMER_CALL_SYS_CRITICAL);
+       timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
  
         return;
  }
@@ -272,30 +297,54 @@ kperf_timer_go(void)
         uint64_t now = mach_absolute_time();
  
         for (unsigned int i = 0; i < kperf_timerc; i++) {
-               if (kperf_timerv[i].period == 0) {
+               struct kperf_timer *timer = &kperf_timerv[i];
+               if (timer->period == 0) {
                         continue;
                 }
  
-               kperf_timer_schedule(&(kperf_timerv[i]), now);
+               atomic_store(&timer->action_state,
+                   timer->actionid & KPERF_TMR_ACTION_MASK);
+               kperf_timer_schedule(timer, now);
         }
  }
  
  void
  kperf_timer_stop(void)
  {
+       /*
+        * Determine which timers are running and store them in a bitset, while
+        * cancelling their timer call.
+        */
+       uint64_t running_timers = 0;
         for (unsigned int i = 0; i < kperf_timerc; i++) {
-               if (kperf_timerv[i].period == 0) {
+               struct kperf_timer *timer = &kperf_timerv[i];
+               if (timer->period == 0) {
                         continue;
                 }
  
-               /* wait for the timer to stop */
-               while (kperf_timerv[i].active);
+               uint32_t action_state = atomic_fetch_and(&timer->action_state,
+                   ~KPERF_TMR_ACTION_MASK);
+               if (action_state & KPERF_TMR_ACTIVE) {
+                       bit_set(running_timers, i);
+               }
+
+               timer_call_cancel(&timer->tcall);
+       }
  
-               timer_call_cancel(&(kperf_timerv[i].tcall));
+       /*
+        * Wait for any running timers to finish their critical sections.
+        */
+       for (unsigned int i = lsb_first(running_timers); i < kperf_timerc;
+           i = lsb_next(running_timers, i)) {
+               while (atomic_load(&kperf_timerv[i].action_state) != 0) {
+                       delay(10);
+               }
         }
  
-       /* wait for PET to stop, too */
-       kperf_pet_config(0);
+       if (pet_timer_id < kperf_timerc) {
+               /* wait for PET to stop, too */
+               kperf_pet_config(0);
+       }
  }
  
  unsigned int
@@ -399,9 +448,7 @@ kperf_timer_reset(void)
         for (unsigned int i = 0; i < kperf_timerc; i++) {
                 kperf_timerv[i].period = 0;
                 kperf_timerv[i].actionid = 0;
-#if defined(__x86_64__)
-               kperf_timerv[i].pending_cpus = 0;
-#endif /* defined(__x86_64__) */
+               atomic_store_explicit(&kperf_timerv[i].pending_cpus, 0, memory_order_relaxed);
         }
  }
  
@@ -412,11 +459,11 @@ kperf_timer_set_count(unsigned int count)
         unsigned int old_count;
  
         if (min_period_abstime == 0) {
-               nanoseconds_to_absolutetime(MIN_PERIOD_NS, &min_period_abstime);
-               nanoseconds_to_absolutetime(MIN_PERIOD_BG_NS, &min_period_bg_abstime);
-               nanoseconds_to_absolutetime(MIN_PERIOD_PET_NS, &min_period_pet_abstime);
-               nanoseconds_to_absolutetime(MIN_PERIOD_PET_BG_NS,
-                       &min_period_pet_bg_abstime);
+               nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime);
+               nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime);
+               nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime);
+               nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS,
+                   &min_period_pet_bg_abstime);
                 assert(min_period_abstime > 0);
         }
  
@@ -453,7 +500,7 @@ kperf_timer_set_count(unsigned int count)
  
         /* create a new array */
         new_timerv = kalloc_tag(count * sizeof(struct kperf_timer),
-               VM_KERN_MEMORY_DIAG);
+           VM_KERN_MEMORY_DIAG);
         if (new_timerv == NULL) {
                 return ENOMEM;
         }
@@ -462,16 +509,16 @@ kperf_timer_set_count(unsigned int count)
  
         if (old_timerv != NULL) {
                 bcopy(kperf_timerv, new_timerv,
-                       kperf_timerc * sizeof(struct kperf_timer));
+                   kperf_timerc * sizeof(struct kperf_timer));
         }
  
         /* zero the new entries */
         bzero(&(new_timerv[kperf_timerc]),
-               (count - old_count) * sizeof(struct kperf_timer));
+           (count - old_count) * sizeof(struct kperf_timer));
  
         /* (re-)setup the timer call info for all entries */
         for (unsigned int i = 0; i < count; i++) {
-               timer_call_setup(&(new_timerv[i].tcall), kperf_timer_handler, &(new_timerv[i]));
+               timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]);
         }
  
         kperf_timerv = new_timerv;