+#define FIRES_THRESHOLD (5000)
+
+T_DECL(kperf_timer_fires_enough_times,
+ "ensure the correct number of timers fire in a period of time")
+{
+ start_controlling_ktrace();
+
+ dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
+
+ ktrace_session_t s = ktrace_session_create();
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
+ ktrace_set_collection_interval(s, 100);
+
+ __block uint64_t nfires = 0;
+ __block uint64_t first_fire_ns = 0;
+ __block uint64_t last_fire_ns = 0;
+
+ int ncpus = dt_ncpu();
+
+ ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
+ nfires++;
+ if (first_fire_ns == 0) {
+ ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
+ &first_fire_ns);
+ }
+ ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
+ &last_fire_ns);
+
+ T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
+ "timer fire should not occur on an IOP");
+ if (nfires >= FIRES_THRESHOLD) {
+ ktrace_end(s, 1);
+ }
+ });
+
+ configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
+
+ pthread_t thread;
+ T_QUIET;
+ T_ASSERT_POSIX_ZERO(pthread_create(&thread, NULL, &spinning_thread,
+ &thread_spinning), NULL);
+ dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
+
+ ktrace_set_completion_handler(s, ^{
+ running_threads = false;
+
+ double duration_secs = (double)(last_fire_ns - first_fire_ns) /
+ NSEC_PER_SEC;
+ T_LOG("stopping thread after %.2f seconds", duration_secs);
+
+ T_QUIET; T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+
+ T_LOG("saw %" PRIu64 " timer fires (%g fires/second)", nfires,
+ (double)nfires / (double)duration_secs);
+ double expected_nfires = duration_secs * NSEC_PER_SEC / TIMER_PERIOD_NS;
+ T_LOG("expecting %g timer fires", expected_nfires);
+ double nfires_seen_pct = expected_nfires / nfires * 100;
+ T_ASSERT_GT(nfires_seen_pct, 95.0,
+ "saw reasonable number of missed timer fires");
+ T_ASSERT_LT(nfires_seen_pct, 105.0,
+ "saw reasonable number of extra timer fires");
+
+ T_END;
+ });
+
+ start_tracing_with_timeout(s, TIMEOUT_SECS);
+
+ dispatch_main();
+}
+
+// kperf_timer_not_oversampling ensures that the profiling timer fires are
+// spaced apart by the programmed timer period. Otherwise, tools that rely on
+// sample count as a proxy for CPU usage will over-estimate.
+
+#define FIRE_PERIOD_THRESHOLD_NS \
+ (TIMER_PERIOD_NS - (uint64_t)(TIMER_PERIOD_NS * 0.05))
+
+struct cirq {
+ unsigned int nslots;
+ unsigned int tail_slot;
+ unsigned int slot_size;
+};
+
+#define CIRQ_INIT(TYPE, NSLOTS) \
+ (struct cirq){ \
+ .nslots = NSLOTS, .tail_slot = 0, .slot_size = sizeof(TYPE), \
+ }
+
+static inline void *
+cirq_get(struct cirq *cq, unsigned int i)
+{
+ return (char *)cq + sizeof(*cq) + (cq->slot_size * i);
+}
+
+static void *
+cirq_top(void *vcq)
+{
+ struct cirq *cq = vcq;
+ unsigned int tail_slot = cq->tail_slot;
+ unsigned int top_slot = (tail_slot > 0 ? tail_slot : cq->nslots) - 1;
+ return cirq_get(cq, top_slot);
+}
+
+static void *
+cirq_push(void *vcq)
+{
+ struct cirq *cq = vcq;
+ unsigned int tail_slot = cq->tail_slot;
+ unsigned int next_slot = tail_slot == cq->nslots - 1 ? 0 : tail_slot + 1;
+ cq->tail_slot = next_slot;
+ return cirq_get(cq, tail_slot);
+}
+
+static void
+cirq_for(void *vcq, void (^iter)(void *elt))
+{
+ struct cirq *cq = vcq;
+ for (unsigned int i = cq->tail_slot; i < cq->nslots; i++) {
+ iter(cirq_get(cq, i));
+ }
+ for (unsigned int i = 0; i < cq->tail_slot; i++) {
+ iter(cirq_get(cq, i));
+ }
+}
+
+#define HISTORY_LEN 5
+
+struct instval {
+ uint64_t iv_instant_ns;
+ uint64_t iv_val;
+};
+
+struct cirq_instval {
+ struct cirq cq;
+ struct instval elts[HISTORY_LEN];
+};
+
+struct cirq_u64 {
+ struct cirq cq;
+ uint64_t elts[HISTORY_LEN];
+};
+
+struct cpu_oversample {
+ struct cirq_instval timer_latencies;
+ struct cirq_instval fire_latencies;
+};
+
+static void
+cpu_oversample_log(struct cpu_oversample *cpu, unsigned int cpuid)
+{
+ T_LOG("CPU %d timer latencies:", cpuid);
+ __block int i = -HISTORY_LEN;
+ cirq_for(&cpu->timer_latencies, ^(void *viv) {
+ struct instval *iv = viv;
+ T_LOG("\t%llu timer latency %d: %llu", iv->iv_instant_ns, i,
+ iv->iv_val);
+ i++;
+ });
+
+ T_LOG("CPU %d fire latencies:", cpuid);
+ i = -HISTORY_LEN;
+ cirq_for(&cpu->fire_latencies, ^(void *viv) {
+ struct instval *iv = viv;
+ T_LOG("\t%llu fire latency %d: %llu", iv->iv_instant_ns, i, iv->iv_val);
+ i++;
+ });
+}
+
+T_DECL(kperf_timer_not_oversampling,
+ "ensure that time between fires is long enough")
+{
+ start_controlling_ktrace();
+
+ ktrace_session_t s = ktrace_session_create();
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
+ // Try not to perturb the system with more work.
+ ktrace_set_collection_interval(s, 1000);
+ __block uint64_t nfires = 0;
+ __block uint64_t first_fire_ns = 0;
+ __block uint64_t last_fire_ns = 0;
+ __block unsigned int last_fire_cpuid = 0;
+
+ int ncpus = dt_ncpu();
+ T_QUIET; T_ASSERT_GT(ncpus, 0, "should see positive number of CPUs");
+
+ struct cpu_oversample *per_cpu = calloc((unsigned int)ncpus,
+ sizeof(per_cpu[0]));
+ T_QUIET; T_WITH_ERRNO;
+ T_ASSERT_NOTNULL(per_cpu, "allocated timer latency tracking");
+ for (int i = 0; i < ncpus; i++) {
+ per_cpu[i].timer_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
+ per_cpu[i].fire_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
+ }
+
+ __block bool in_stackshot = false;
+ __block uint64_t last_stackshot_ns = 0;
+
+ // Stackshots are the primary source of interrupt latency on the system.
+ ktrace_events_single(s, KDBG_EVENTID(DBG_BSD, DBG_BSD_EXCP_SC,
+ SYS_stack_snapshot_with_config), ^(struct trace_point *tp) {
+ bool start = tp->debugid & DBG_FUNC_START;
+ uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
+ T_LOG("%llu: %s stackshot syscall from process %s",
+ cur_ns, start ? "start" : "finish", tp->command);
+ in_stackshot = start;
+ if (!start) {
+ last_stackshot_ns = cur_ns;
+ }
+ });
+
+ struct cirq_u64 *fire_times = calloc(1, sizeof(*fire_times));
+ T_ASSERT_NOTNULL(fire_times, "allocated fire time tracking");
+ fire_times->cq = CIRQ_INIT(uint64_t, HISTORY_LEN);
+
+ // Track the decrementer's latency values to find any unexpectedly long
+ // interrupt latencies that could affect the firing cadence.
+ ktrace_events_single(s, MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0),
+ ^(struct trace_point *tp) {
+ uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
+ uint64_t latency_ns = ns_from_abs(s, 0 - tp->arg1);
+ struct instval *latency = cirq_push(&per_cpu[tp->cpuid].timer_latencies);
+ latency->iv_instant_ns = cur_ns;
+ latency->iv_val = latency_ns;
+ });
+
+ ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
+ T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
+ "timer fire should not occur on an IOP");
+
+ nfires++;
+ uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
+ uint64_t *fire_ns = cirq_push(fire_times);
+ *fire_ns = cur_ns;
+
+ struct cpu_oversample *cur_cpu = &per_cpu[tp->cpuid];
+ struct instval *last_timer_latency = cirq_top(
+ &cur_cpu->timer_latencies);
+ uint64_t timer_latency_ns = last_timer_latency->iv_val;
+
+ if (first_fire_ns == 0) {
+ first_fire_ns = cur_ns;
+ } else {
+ struct cpu_oversample *last_cpu = &per_cpu[last_fire_cpuid];
+ struct instval *last_latency = cirq_top(&last_cpu->fire_latencies);
+ uint64_t last_fire_latency_ns = last_latency->iv_val;
+
+ if (timer_latency_ns > TIMER_PERIOD_NS / 4) {
+ T_LOG("%llu: long timer latency at fire: %llu", cur_ns,
+ timer_latency_ns);
+ }
+
+ // Long interrupt latencies will cause the timer to miss its fire
+ // time and report a fire past when it should have, making the next
+ // period too short. Keep track of the latency as a leeway
+ // adjustment for the subsequent fire.
+ uint64_t fire_period_ns = cur_ns - last_fire_ns;
+ uint64_t fire_period_adj_ns = fire_period_ns +
+ last_fire_latency_ns + timer_latency_ns;
+ bool too_short = fire_period_adj_ns < FIRE_PERIOD_THRESHOLD_NS;
+ if (too_short) {
+ T_LOG("%llu: period of timer fire %llu is %llu + %llu + %llu = "
+ "%llu < %llu",
+ cur_ns, nfires, fire_period_ns, last_fire_latency_ns,
+ timer_latency_ns, fire_period_adj_ns,
+ FIRE_PERIOD_THRESHOLD_NS);
+
+ T_LOG("short profile timer fired on CPU %d", tp->cpuid);
+ cpu_oversample_log(cur_cpu, tp->cpuid);
+
+ if (cur_cpu == last_cpu) {
+ T_LOG("fired back-to-back on CPU %d", tp->cpuid);
+ } else {
+ T_LOG("previous profile timer fired on CPU %d",
+ last_fire_cpuid);
+ cpu_oversample_log(last_cpu, last_fire_cpuid);
+ }
+
+ T_LOG("profile timer fires:");
+ cirq_for(fire_times, ^(void *vu64) {
+ T_LOG("\tfire: %llu", *(uint64_t *)vu64);
+ });
+ if (nfires < (unsigned int)ncpus) {
+ T_LOG("ignoring timer fire %llu as context may be missing",
+ nfires);
+ } else {
+ if (in_stackshot) {
+ T_LOG("skipping assertion because stackshot is "
+ "happening");
+ } else if (last_stackshot_ns != 0 &&
+ cur_ns > last_stackshot_ns &&
+ cur_ns - last_stackshot_ns < TIMER_PERIOD_NS) {
+ T_LOG("skipping assertion because stackshot happened "
+ "%" PRIu64 "ns ago",
+ cur_ns - last_stackshot_ns);
+ } else {
+ T_ASSERT_FAIL("profiling period is shorter than "
+ "expected with no stackshot interference");
+ }
+ }
+ }
+
+ struct instval *latency = cirq_push(&cur_cpu->fire_latencies);
+ latency->iv_instant_ns = cur_ns;
+ latency->iv_val = timer_latency_ns;
+
+ // Snapshot this timer fire's interrupt latency, so the next one
+ // can make an adjustment to the period.
+ last_fire_latency_ns = timer_latency_ns;
+ }
+ last_fire_ns = cur_ns;
+ last_fire_cpuid = tp->cpuid;
+
+ if (nfires >= FIRES_THRESHOLD) {
+ ktrace_end(s, 1);
+ }
+ });
+
+ configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_TINFO);
+
+ ktrace_set_completion_handler(s, ^{
+ double duration_secs = (double)(last_fire_ns - first_fire_ns) /
+ NSEC_PER_SEC;
+ T_LOG("stopping trace after %.2f seconds", duration_secs);
+
+ T_PASS("saw %" PRIu64 " timer fires (%g fires/second) without "
+ "oversampling", nfires, (double)nfires / (double)duration_secs);
+
+ T_END;
+ });
+
+ start_tracing_with_timeout(s, 5);
+
+ // Get all CPUs out of idle.
+ uint64_t *counts = kpc_counterbuf_alloc();
+ (void)kpc_get_cpu_counters(true,KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
+ free(counts);
+
+ dispatch_main();
+}
+
+T_DECL(kperf_timer_stress, "repeatedly enable and disable timers")
+{
+ start_controlling_ktrace();
+
+ const int niters = 500;
+ for (int i = 0; i < niters; i++) {
+ configure_kperf_stacks_timer(-1, 1, true);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
+ usleep(2000);
+ kperf_reset();
+ }
+ T_LOG("configured kperf with a timer %d times", niters);
+}
+
+#pragma mark - kdebug triggers