]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/monotonic_arm64.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / osfmk / arm64 / monotonic_arm64.c
1 /*
2 * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/cpu_data_internal.h>
30 #include <arm/machine_routines.h>
31 #include <arm64/monotonic.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h> /* panic */
34 #include <kern/kpc.h>
35 #include <kern/monotonic.h>
36 #include <machine/atomic.h>
37 #include <machine/limits.h> /* CHAR_BIT */
38 #include <os/overflow.h>
39 #include <pexpert/arm64/board_config.h>
40 #include <pexpert/device_tree.h> /* SecureDTFindEntry */
41 #include <pexpert/pexpert.h>
42 #include <stdatomic.h>
43 #include <stdint.h>
44 #include <string.h>
45 #include <sys/errno.h>
46 #include <sys/monotonic.h>
47
48 /*
49 * Ensure that control registers read back what was written under MACH_ASSERT
50 * kernels.
51 *
52 * A static inline function cannot be used due to passing the register through
53 * the builtin -- it requires a constant string as its first argument, since
54 * MSRs registers are encoded as an immediate in the instruction.
55 */
56 #if MACH_ASSERT
57 #define CTRL_REG_SET(reg, val) do { \
58 __builtin_arm_wsr64((reg), (val)); \
59 uint64_t __check_reg = __builtin_arm_rsr64((reg)); \
60 if (__check_reg != (val)) { \
61 panic("value written to %s was not read back (wrote %llx, read %llx)", \
62 #reg, (val), __check_reg); \
63 } \
64 } while (0)
65 #else /* MACH_ASSERT */
66 #define CTRL_REG_SET(reg, val) __builtin_arm_wsr64((reg), (val))
67 #endif /* MACH_ASSERT */
68
69 #pragma mark core counters
70
71 bool mt_core_supported = true;
72
73 static const ml_topology_info_t *topology_info;
74
75 /*
76 * PMC[0-1] are the 48-bit fixed counters -- PMC0 is cycles and PMC1 is
77 * instructions (see arm64/monotonic.h).
78 *
79 * PMC2+ are currently handled by kpc.
80 */
81
82 #define PMC0 "s3_2_c15_c0_0"
83 #define PMC1 "s3_2_c15_c1_0"
84 #define PMC2 "s3_2_c15_c2_0"
85 #define PMC3 "s3_2_c15_c3_0"
86 #define PMC4 "s3_2_c15_c4_0"
87 #define PMC5 "s3_2_c15_c5_0"
88 #define PMC6 "s3_2_c15_c6_0"
89 #define PMC7 "s3_2_c15_c7_0"
90
91 #define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
92 X(6, A); X(7, A)
93
94 #if CORE_NCTRS > 8
95 #define PMC8 "s3_2_c15_c9_0"
96 #define PMC9 "s3_2_c15_c10_0"
97 #define PMC_8_9(X, A) X(8, A); X(9, A)
98 #else // CORE_NCTRS > 8
99 #define PMC_8_9(X, A)
100 #endif // CORE_NCTRS > 8
101
102 #define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A)
103
104 #define CTR_MAX ((UINT64_C(1) << 47) - 1)
105
106 #define CYCLES 0
107 #define INSTRS 1
108
109 /*
110 * PMC0's offset into a core's PIO range.
111 *
112 * This allows cores to remotely query another core's counters.
113 */
114
115 #define PIO_PMC0_OFFSET (0x200)
116
117 /*
118 * The offset of the counter in the configuration registers. Post-Hurricane
119 * devices have additional counters that need a larger shift than the original
120 * counters.
121 *
122 * XXX For now, just support the lower-numbered counters.
123 */
124 #define CTR_POS(CTR) (CTR)
125
126 /*
127 * PMCR0 is the main control register for the performance monitor. It
128 * controls whether the counters are enabled, how they deliver interrupts, and
129 * other features.
130 */
131
132 #define PMCR0_CTR_EN(CTR) (UINT64_C(1) << CTR_POS(CTR))
133 #define PMCR0_FIXED_EN (PMCR0_CTR_EN(CYCLES) | PMCR0_CTR_EN(INSTRS))
134 /* how interrupts are delivered on a PMI */
135 enum {
136 PMCR0_INTGEN_OFF = 0,
137 PMCR0_INTGEN_PMI = 1,
138 PMCR0_INTGEN_AIC = 2,
139 PMCR0_INTGEN_HALT = 3,
140 PMCR0_INTGEN_FIQ = 4,
141 };
142 #define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8)
143
144 #if CPMU_AIC_PMI
145 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC)
146 #else /* CPMU_AIC_PMI */
147 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ)
148 #endif /* !CPMU_AIC_PMI */
149
150 #define PMCR0_PMI_SHIFT (12)
151 #define PMCR0_CTR_GE8_PMI_SHIFT (44)
152 #define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR)))
153 /* fixed counters are always counting */
154 #define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS))
155 /* disable counting on a PMI */
156 #define PMCR0_DISCNT_EN (UINT64_C(1) << 20)
157 /* block PMIs until ERET retires */
158 #define PMCR0_WFRFE_EN (UINT64_C(1) << 22)
159 /* count global (not just core-local) L2C events */
160 #define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23)
161 /* user mode access to configuration registers */
162 #define PMCR0_USEREN_EN (UINT64_C(1) << 30)
163 #define PMCR0_CTR_GE8_EN_SHIFT (32)
164
165 #define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT)
166
167 /*
168 * PMCR1 controls which execution modes count events.
169 */
170
171 #define PMCR1 "s3_1_c15_c1_0"
172
173 #define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR)))
174 #define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR)))
175 #define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR)))
176 /* PMCR1_EL3A64 is not supported on systems with no monitor */
177 #if defined(APPLEHURRICANE)
178 #define PMCR1_EL3A64_EN(CTR) UINT64_C(0)
179 #else
180 #define PMCR1_EL3A64_EN(CTR) (UINT64_C(1) << (24 + CTR_POS(CTR)))
181 #endif
182 #define PMCR1_ALL_EN(CTR) (PMCR1_EL0A32_EN(CTR) | PMCR1_EL0A64_EN(CTR) | \
183 PMCR1_EL1A64_EN(CTR) | PMCR1_EL3A64_EN(CTR))
184
185 /* fixed counters always count in all modes */
186 #define PMCR1_INIT (PMCR1_ALL_EN(CYCLES) | PMCR1_ALL_EN(INSTRS))
187
188 static inline void
189 core_init_execution_modes(void)
190 {
191 uint64_t pmcr1;
192
193 pmcr1 = __builtin_arm_rsr64(PMCR1);
194 pmcr1 |= PMCR1_INIT;
195 __builtin_arm_wsr64(PMCR1, pmcr1);
196 }
197
198 /*
199 * PMCR2 controls watchpoint registers.
200 *
201 * PMCR3 controls breakpoints and address matching.
202 *
203 * PMCR4 controls opcode matching.
204 */
205
206 #define PMCR2 "s3_1_c15_c2_0"
207 #define PMCR3 "s3_1_c15_c3_0"
208 #define PMCR4 "s3_1_c15_c4_0"
209
210 #define PMSR "s3_1_c15_c13_0"
211
212 #define PMSR_OVF(CTR) (1ULL << (CTR))
213
214 #define PMESR0 "S3_1_c15_c5_0"
215 #define PMESR1 "S3_1_c15_c6_0"
216
217 static int
218 core_init(__unused mt_device_t dev)
219 {
220 /* the dev node interface to the core counters is still unsupported */
221 return ENOTSUP;
222 }
223
224 struct mt_cpu *
225 mt_cur_cpu(void)
226 {
227 return &getCpuDatap()->cpu_monotonic;
228 }
229
230 uint64_t
231 mt_core_snap(unsigned int ctr)
232 {
233 switch (ctr) {
234 #define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(PMC ## CTR)
235 PMC_ALL(PMC_RD, 0);
236 #undef PMC_RD
237 default:
238 panic("monotonic: invalid core counter read: %u", ctr);
239 __builtin_unreachable();
240 }
241 }
242
243 void
244 mt_core_set_snap(unsigned int ctr, uint64_t count)
245 {
246 switch (ctr) {
247 case 0:
248 __builtin_arm_wsr64(PMC0, count);
249 break;
250 case 1:
251 __builtin_arm_wsr64(PMC1, count);
252 break;
253 default:
254 panic("monotonic: invalid core counter %u write %llu", ctr, count);
255 __builtin_unreachable();
256 }
257 }
258
259 static void
260 core_set_enabled(void)
261 {
262 uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
263 pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN;
264
265 if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
266 uint64_t kpc_ctrs = kpc_get_configurable_pmc_mask(
267 KPC_CLASS_CONFIGURABLE_MASK) << MT_CORE_NFIXED;
268 #if KPC_ARM64_CONFIGURABLE_COUNT > 6
269 uint64_t ctrs_ge8 = kpc_ctrs >> 8;
270 pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_EN_SHIFT;
271 pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_PMI_SHIFT;
272 kpc_ctrs &= (1ULL << 8) - 1;
273 #endif /* KPC_ARM64_CONFIGURABLE_COUNT > 6 */
274 kpc_ctrs |= kpc_ctrs << PMCR0_PMI_SHIFT;
275 pmcr0 |= kpc_ctrs;
276 }
277
278 __builtin_arm_wsr64(PMCR0, pmcr0);
279 #if MACH_ASSERT
280 /*
281 * Only check for the values that were ORed in.
282 */
283 uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0);
284 if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) {
285 panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
286 pmcr0_check, pmcr0);
287 }
288 #endif /* MACH_ASSERT */
289 }
290
291 static void
292 core_idle(__unused cpu_data_t *cpu)
293 {
294 assert(cpu != NULL);
295 assert(ml_get_interrupts_enabled() == FALSE);
296
297 #if DEBUG
298 uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
299 if ((pmcr0 & PMCR0_FIXED_EN) == 0) {
300 panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0);
301 }
302 uint64_t pmcr1 = __builtin_arm_rsr64(PMCR1);
303 if ((pmcr1 & PMCR1_INIT) == 0) {
304 panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1);
305 }
306 #endif /* DEBUG */
307
308 /* disable counters before updating */
309 __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
310
311 mt_update_fixed_counts();
312 }
313
314 #pragma mark uncore performance monitor
315
316 #if HAS_UNCORE_CTRS
317
318 static bool mt_uncore_initted = false;
319
320 /*
321 * Uncore Performance Monitor
322 *
323 * Uncore performance monitors provide event-counting for the last-level caches
324 * (LLCs). Each LLC has its own uncore performance monitor, which can only be
325 * accessed by cores that use that LLC. Like the core performance monitoring
326 * unit, uncore counters are configured globally. If there is more than one
327 * LLC on the system, PIO reads must be used to satisfy uncore requests (using
328 * the `_r` remote variants of the access functions). Otherwise, local MSRs
329 * suffice (using the `_l` local variants of the access functions).
330 */
331
332 #if UNCORE_PER_CLUSTER
333 #define MAX_NMONITORS MAX_CPU_CLUSTERS
334 static uintptr_t cpm_impl[MAX_NMONITORS] = {};
335 #else
336 #define MAX_NMONITORS (1)
337 #endif /* UNCORE_PER_CLUSTER */
338
339 #if UNCORE_VERSION >= 2
340 /*
341 * V2 uncore monitors feature a CTI mechanism -- the second bit of UPMSR is
342 * used to track if a CTI has been triggered due to an overflow.
343 */
344 #define UPMSR_OVF_POS 2
345 #else /* UNCORE_VERSION >= 2 */
346 #define UPMSR_OVF_POS 1
347 #endif /* UNCORE_VERSION < 2 */
348 #define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1)
349 #define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS)
350
351 #define UPMPCM "s3_7_c15_c5_4"
352 #define UPMPCM_CORE(ID) (UINT64_C(1) << (ID))
353
354 /*
355 * The uncore_pmi_mask is a bitmask of CPUs that receive uncore PMIs. It's
356 * initialized by uncore_init and controllable by the uncore_pmi_mask boot-arg.
357 */
358 static int32_t uncore_pmi_mask = 0;
359
360 /*
361 * The uncore_active_ctrs is a bitmask of uncore counters that are currently
362 * requested.
363 */
364 static uint16_t uncore_active_ctrs = 0;
365 static_assert(sizeof(uncore_active_ctrs) * CHAR_BIT >= UNCORE_NCTRS,
366 "counter mask should fit the full range of counters");
367
368 /*
369 * mt_uncore_enabled is true when any uncore counters are active.
370 */
371 bool mt_uncore_enabled = false;
372
373 /*
374 * The uncore_events are the event configurations for each uncore counter -- as
375 * a union to make it easy to program the hardware registers.
376 */
377 static struct uncore_config {
378 union {
379 uint8_t uce_ctrs[UNCORE_NCTRS];
380 uint64_t uce_regs[UNCORE_NCTRS / 8];
381 } uc_events;
382 union {
383 uint16_t uccm_masks[UNCORE_NCTRS];
384 uint64_t uccm_regs[UNCORE_NCTRS / 4];
385 } uc_cpu_masks[MAX_NMONITORS];
386 } uncore_config;
387
388 static struct uncore_monitor {
389 /*
390 * The last snapshot of each of the hardware counter values.
391 */
392 uint64_t um_snaps[UNCORE_NCTRS];
393
394 /*
395 * The accumulated counts for each counter.
396 */
397 uint64_t um_counts[UNCORE_NCTRS];
398
399 /*
400 * Protects accessing the hardware registers and fields in this structure.
401 */
402 lck_spin_t um_lock;
403
404 /*
405 * Whether this monitor needs its registers restored after wake.
406 */
407 bool um_sleeping;
408 } uncore_monitors[MAX_NMONITORS];
409
410 /*
411 * Each uncore unit has its own monitor, corresponding to the memory hierarchy
412 * of the LLCs.
413 */
414 static unsigned int
415 uncore_nmonitors(void)
416 {
417 #if UNCORE_PER_CLUSTER
418 return topology_info->num_clusters;
419 #else /* UNCORE_PER_CLUSTER */
420 return 1;
421 #endif /* !UNCORE_PER_CLUSTER */
422 }
423
424 static unsigned int
425 uncmon_get_curid(void)
426 {
427 #if UNCORE_PER_CLUSTER
428 // Pointer arithmetic to translate cluster_id into a clusters[] index.
429 return cpu_cluster_id();
430 #else /* UNCORE_PER_CLUSTER */
431 return 0;
432 #endif /* !UNCORE_PER_CLUSTER */
433 }
434
435 /*
436 * Per-monitor locks are required to prevent races with the PMI handlers, not
437 * from other CPUs that are configuring (those are serialized with monotonic's
438 * per-device lock).
439 */
440
441 static int
442 uncmon_lock(struct uncore_monitor *mon)
443 {
444 int intrs_en = ml_set_interrupts_enabled(FALSE);
445 lck_spin_lock(&mon->um_lock);
446 return intrs_en;
447 }
448
449 static void
450 uncmon_unlock(struct uncore_monitor *mon, int intrs_en)
451 {
452 lck_spin_unlock(&mon->um_lock);
453 (void)ml_set_interrupts_enabled(intrs_en);
454 }
455
456 /*
457 * Helper functions for accessing the hardware -- these require the monitor be
458 * locked to prevent other CPUs' PMI handlers from making local modifications
459 * or updating the counts.
460 */
461
462 #if UNCORE_VERSION >= 2
463 #define UPMCR0_INTEN_POS 20
464 #define UPMCR0_INTGEN_POS 16
465 #else /* UNCORE_VERSION >= 2 */
466 #define UPMCR0_INTEN_POS 12
467 #define UPMCR0_INTGEN_POS 8
468 #endif /* UNCORE_VERSION < 2 */
469 enum {
470 UPMCR0_INTGEN_OFF = 0,
471 /* fast PMIs are only supported on core CPMU */
472 UPMCR0_INTGEN_AIC = 2,
473 UPMCR0_INTGEN_HALT = 3,
474 UPMCR0_INTGEN_FIQ = 4,
475 };
476 /* always enable interrupts for all counters */
477 #define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS)
478 /* route uncore PMIs through the FIQ path */
479 #define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS))
480
481 /*
482 * Turn counting on for counters set in the `enctrmask` and off, otherwise.
483 */
484 static inline void
485 uncmon_set_counting_locked_l(__unused unsigned int monid, uint64_t enctrmask)
486 {
487 /*
488 * UPMCR0 controls which counters are enabled and how interrupts are generated
489 * for overflows.
490 */
491 #define UPMCR0 "s3_7_c15_c0_4"
492 __builtin_arm_wsr64(UPMCR0, UPMCR0_INIT | enctrmask);
493 }
494
495 #if UNCORE_PER_CLUSTER
496
497 /*
498 * Turn counting on for counters set in the `enctrmask` and off, otherwise.
499 */
500 static inline void
501 uncmon_set_counting_locked_r(unsigned int monid, uint64_t enctrmask)
502 {
503 const uintptr_t upmcr0_offset = 0x4180;
504 *(uint64_t *)(cpm_impl[monid] + upmcr0_offset) = UPMCR0_INIT | enctrmask;
505 }
506
507 #endif /* UNCORE_PER_CLUSTER */
508
509 /*
510 * The uncore performance monitoring counters (UPMCs) are 48-bits wide. The
511 * high bit is an overflow bit, triggering a PMI, providing 47 usable bits.
512 */
513
514 #define UPMC_MAX ((UINT64_C(1) << 48) - 1)
515
516 /*
517 * The `__builtin_arm_{r,w}sr` functions require constant strings, since the
518 * MSR/MRS instructions encode the registers as immediates. Otherwise, this
519 * would be indexing into an array of strings.
520 */
521
522 #define UPMC0 "s3_7_c15_c7_4"
523 #define UPMC1 "s3_7_c15_c8_4"
524 #define UPMC2 "s3_7_c15_c9_4"
525 #define UPMC3 "s3_7_c15_c10_4"
526 #define UPMC4 "s3_7_c15_c11_4"
527 #define UPMC5 "s3_7_c15_c12_4"
528 #define UPMC6 "s3_7_c15_c13_4"
529 #define UPMC7 "s3_7_c15_c14_4"
530 #if UNCORE_NCTRS > 8
531 #define UPMC8 "s3_7_c15_c0_5"
532 #define UPMC9 "s3_7_c15_c1_5"
533 #define UPMC10 "s3_7_c15_c2_5"
534 #define UPMC11 "s3_7_c15_c3_5"
535 #define UPMC12 "s3_7_c15_c4_5"
536 #define UPMC13 "s3_7_c15_c5_5"
537 #define UPMC14 "s3_7_c15_c6_5"
538 #define UPMC15 "s3_7_c15_c7_5"
539 #endif /* UNCORE_NCTRS > 8 */
540
541 #define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
542 X(6, A); X(7, A)
543 #if UNCORE_NCTRS <= 8
544 #define UPMC_ALL(X, A) UPMC_0_7(X, A)
545 #else /* UNCORE_NCTRS <= 8 */
546 #define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \
547 X(13, A); X(14, A); X(15, A)
548 #define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A)
549 #endif /* UNCORE_NCTRS > 8 */
550
551 static inline uint64_t
552 uncmon_read_counter_locked_l(__unused unsigned int monid, unsigned int ctr)
553 {
554 assert(ctr < UNCORE_NCTRS);
555 switch (ctr) {
556 #define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(UPMC ## CTR)
557 UPMC_ALL(UPMC_RD, 0);
558 #undef UPMC_RD
559 default:
560 panic("monotonic: invalid counter read %u", ctr);
561 __builtin_unreachable();
562 }
563 }
564
565 static inline void
566 uncmon_write_counter_locked_l(__unused unsigned int monid, unsigned int ctr,
567 uint64_t count)
568 {
569 assert(count < UPMC_MAX);
570 assert(ctr < UNCORE_NCTRS);
571 switch (ctr) {
572 #define UPMC_WR(CTR, COUNT) case (CTR): \
573 return __builtin_arm_wsr64(UPMC ## CTR, (COUNT))
574 UPMC_ALL(UPMC_WR, count);
575 #undef UPMC_WR
576 default:
577 panic("monotonic: invalid counter write %u", ctr);
578 }
579 }
580
581 #if UNCORE_PER_CLUSTER
582
583 uintptr_t upmc_offs[UNCORE_NCTRS] = {
584 [0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120,
585 [5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268,
586 [10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278,
587 [14] = 0x4170, [15] = 0x4280,
588 };
589
590 static inline uint64_t
591 uncmon_read_counter_locked_r(unsigned int mon_id, unsigned int ctr)
592 {
593 assert(mon_id < uncore_nmonitors());
594 assert(ctr < UNCORE_NCTRS);
595 return *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]);
596 }
597
598 static inline void
599 uncmon_write_counter_locked_r(unsigned int mon_id, unsigned int ctr,
600 uint64_t count)
601 {
602 assert(count < UPMC_MAX);
603 assert(ctr < UNCORE_NCTRS);
604 assert(mon_id < uncore_nmonitors());
605 *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]) = count;
606 }
607
608 #endif /* UNCORE_PER_CLUSTER */
609
610 static inline void
611 uncmon_update_locked(unsigned int monid, unsigned int curid, unsigned int ctr)
612 {
613 struct uncore_monitor *mon = &uncore_monitors[monid];
614 uint64_t snap = 0;
615 if (curid == monid) {
616 snap = uncmon_read_counter_locked_l(monid, ctr);
617 } else {
618 #if UNCORE_PER_CLUSTER
619 snap = uncmon_read_counter_locked_r(monid, ctr);
620 #endif /* UNCORE_PER_CLUSTER */
621 }
622 /* counters should increase monotonically */
623 assert(snap >= mon->um_snaps[ctr]);
624 mon->um_counts[ctr] += snap - mon->um_snaps[ctr];
625 mon->um_snaps[ctr] = snap;
626 }
627
628 static inline void
629 uncmon_program_events_locked_l(unsigned int monid)
630 {
631 /*
632 * UPMESR[01] is the event selection register that determines which event a
633 * counter will count.
634 */
635 #define UPMESR0 "s3_7_c15_c1_4"
636 CTRL_REG_SET(UPMESR0, uncore_config.uc_events.uce_regs[0]);
637
638 #if UNCORE_NCTRS > 8
639 #define UPMESR1 "s3_7_c15_c11_5"
640 CTRL_REG_SET(UPMESR1, uncore_config.uc_events.uce_regs[1]);
641 #endif /* UNCORE_NCTRS > 8 */
642
643 /*
644 * UPMECM[0123] are the event core masks for each counter -- whether or not
645 * that counter counts events generated by an agent. These are set to all
646 * ones so the uncore counters count events from all cores.
647 *
648 * The bits are based off the start of the cluster -- e.g. even if a core
649 * has a CPU ID of 4, it might be the first CPU in a cluster. Shift the
650 * registers right by the ID of the first CPU in the cluster.
651 */
652 #define UPMECM0 "s3_7_c15_c3_4"
653 #define UPMECM1 "s3_7_c15_c4_4"
654
655 CTRL_REG_SET(UPMECM0,
656 uncore_config.uc_cpu_masks[monid].uccm_regs[0]);
657 CTRL_REG_SET(UPMECM1,
658 uncore_config.uc_cpu_masks[monid].uccm_regs[1]);
659
660 #if UNCORE_NCTRS > 8
661 #define UPMECM2 "s3_7_c15_c8_5"
662 #define UPMECM3 "s3_7_c15_c9_5"
663
664 CTRL_REG_SET(UPMECM2,
665 uncore_config.uc_cpu_masks[monid].uccm_regs[2]);
666 CTRL_REG_SET(UPMECM3,
667 uncore_config.uc_cpu_masks[monid].uccm_regs[3]);
668 #endif /* UNCORE_NCTRS > 8 */
669 }
670
671 #if UNCORE_PER_CLUSTER
672
673 static inline void
674 uncmon_program_events_locked_r(unsigned int monid)
675 {
676 const uintptr_t upmesr_offs[2] = {[0] = 0x41b0, [1] = 0x41b8, };
677
678 for (unsigned int i = 0; i < sizeof(upmesr_offs) / sizeof(upmesr_offs[0]);
679 i++) {
680 *(uint64_t *)(cpm_impl[monid] + upmesr_offs[i]) =
681 uncore_config.uc_events.uce_regs[i];
682 }
683
684 const uintptr_t upmecm_offs[4] = {
685 [0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8,
686 };
687
688 for (unsigned int i = 0; i < sizeof(upmecm_offs) / sizeof(upmecm_offs[0]);
689 i++) {
690 *(uint64_t *)(cpm_impl[monid] + upmecm_offs[i]) =
691 uncore_config.uc_cpu_masks[monid].uccm_regs[i];
692 }
693 }
694
695 #endif /* UNCORE_PER_CLUSTER */
696
697 static void
698 uncmon_clear_int_locked_l(__unused unsigned int monid)
699 {
700 __builtin_arm_wsr64(UPMSR, 0);
701 }
702
703 #if UNCORE_PER_CLUSTER
704
705 static void
706 uncmon_clear_int_locked_r(unsigned int monid)
707 {
708 const uintptr_t upmsr_off = 0x41c0;
709 *(uint64_t *)(cpm_impl[monid] + upmsr_off) = 0;
710 }
711
712 #endif /* UNCORE_PER_CLUSTER */
713
714 /*
715 * Get the PMI mask for the provided `monid` -- that is, the bitmap of CPUs
716 * that should be sent PMIs for a particular monitor.
717 */
718 static uint64_t
719 uncmon_get_pmi_mask(unsigned int monid)
720 {
721 uint64_t pmi_mask = uncore_pmi_mask;
722
723 #if UNCORE_PER_CLUSTER
724 pmi_mask &= topology_info->clusters[monid].cpu_mask;
725 #else /* UNCORE_PER_CLUSTER */
726 #pragma unused(monid)
727 #endif /* !UNCORE_PER_CLUSTER */
728
729 return pmi_mask;
730 }
731
732 /*
733 * Initialization routines for the uncore counters.
734 */
735
736 static void
737 uncmon_init_locked_l(unsigned int monid)
738 {
739 /*
740 * UPMPCM defines the PMI core mask for the UPMCs -- which cores should
741 * receive interrupts on overflow.
742 */
743 CTRL_REG_SET(UPMPCM, uncmon_get_pmi_mask(monid));
744 uncmon_set_counting_locked_l(monid,
745 mt_uncore_enabled ? uncore_active_ctrs : 0);
746 }
747
748 #if UNCORE_PER_CLUSTER
749
750 static uintptr_t acc_impl[MAX_NMONITORS] = {};
751
752 static void
753 uncmon_init_locked_r(unsigned int monid)
754 {
755 const uintptr_t upmpcm_off = 0x1010;
756
757 *(uint64_t *)(acc_impl[monid] + upmpcm_off) = uncmon_get_pmi_mask(monid);
758 uncmon_set_counting_locked_r(monid,
759 mt_uncore_enabled ? uncore_active_ctrs : 0);
760 }
761
762 #endif /* UNCORE_PER_CLUSTER */
763
764 /*
765 * Initialize the uncore device for monotonic.
766 */
767 static int
768 uncore_init(__unused mt_device_t dev)
769 {
770 #if HAS_UNCORE_CTRS
771 assert(MT_NDEVS > 0);
772 mt_devices[MT_NDEVS - 1].mtd_nmonitors = (uint8_t)uncore_nmonitors();
773 #endif
774
775 #if DEVELOPMENT || DEBUG
776 /*
777 * Development and debug kernels observe the `uncore_pmi_mask` boot-arg,
778 * allowing PMIs to be routed to the CPUs present in the supplied bitmap.
779 * Do some sanity checks on the value provided.
780 */
781 bool parsed_arg = PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask,
782 sizeof(uncore_pmi_mask));
783 if (parsed_arg) {
784 #if UNCORE_PER_CLUSTER
785 if (__builtin_popcount(uncore_pmi_mask) != (int)uncore_nmonitors()) {
786 panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
787 }
788 for (unsigned int i = 0; i < uncore_nmonitors(); i++) {
789 if (__builtin_popcountll(uncmon_get_pmi_mask(i)) != 1) {
790 panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x",
791 i, uncore_pmi_mask);
792 }
793 }
794 #else /* UNCORE_PER_CLUSTER */
795 if (__builtin_popcount(uncore_pmi_mask) != 1) {
796 panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
797 }
798 #endif /* !UNCORE_PER_CLUSTER */
799 } else
800 #endif /* DEVELOPMENT || DEBUG */
801 {
802 #if UNCORE_PER_CLUSTER
803 for (unsigned int i = 0; i < topology_info->num_clusters; i++) {
804 uncore_pmi_mask |= 1ULL << topology_info->clusters[i].first_cpu_id;
805 }
806 #else /* UNCORE_PER_CLUSTER */
807 /* arbitrarily route to core 0 */
808 uncore_pmi_mask |= 1;
809 #endif /* !UNCORE_PER_CLUSTER */
810 }
811 assert(uncore_pmi_mask != 0);
812
813 unsigned int curmonid = uncmon_get_curid();
814
815 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
816 #if UNCORE_PER_CLUSTER
817 ml_topology_cluster_t *cluster = &topology_info->clusters[monid];
818 cpm_impl[monid] = (uintptr_t)cluster->cpm_IMPL_regs;
819 acc_impl[monid] = (uintptr_t)cluster->acc_IMPL_regs;
820 assert(cpm_impl[monid] != 0 && acc_impl[monid] != 0);
821 #endif /* UNCORE_PER_CLUSTER */
822
823 struct uncore_monitor *mon = &uncore_monitors[monid];
824 lck_spin_init(&mon->um_lock, mt_lock_grp, NULL);
825
826 int intrs_en = uncmon_lock(mon);
827 if (monid != curmonid) {
828 #if UNCORE_PER_CLUSTER
829 uncmon_init_locked_r(monid);
830 #endif /* UNCORE_PER_CLUSTER */
831 } else {
832 uncmon_init_locked_l(monid);
833 }
834 uncmon_unlock(mon, intrs_en);
835 }
836
837 mt_uncore_initted = true;
838
839 return 0;
840 }
841
842 /*
843 * Support for monotonic's mtd_read function.
844 */
845
846 static void
847 uncmon_read_all_counters(unsigned int monid, unsigned int curmonid,
848 uint64_t ctr_mask, uint64_t *counts)
849 {
850 struct uncore_monitor *mon = &uncore_monitors[monid];
851
852 int intrs_en = uncmon_lock(mon);
853
854 for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
855 if (ctr_mask & (1ULL << ctr)) {
856 uncmon_update_locked(monid, curmonid, ctr);
857 counts[ctr] = mon->um_counts[ctr];
858 }
859 }
860
861 uncmon_unlock(mon, intrs_en);
862 }
863
864 /*
865 * Read all monitor's counters.
866 */
867 static int
868 uncore_read(uint64_t ctr_mask, uint64_t *counts_out)
869 {
870 assert(ctr_mask != 0);
871 assert(counts_out != NULL);
872
873 if (!uncore_active_ctrs) {
874 return EPWROFF;
875 }
876 if (ctr_mask & ~uncore_active_ctrs) {
877 return EINVAL;
878 }
879
880 unsigned int curmonid = uncmon_get_curid();
881 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
882 /*
883 * Find this monitor's starting offset into the `counts_out` array.
884 */
885 uint64_t *counts = counts_out + (UNCORE_NCTRS * monid);
886
887 uncmon_read_all_counters(monid, curmonid, ctr_mask, counts);
888 }
889
890 return 0;
891 }
892
893 /*
894 * Support for monotonic's mtd_add function.
895 */
896
897 /*
898 * Add an event to the current uncore configuration. This doesn't take effect
899 * until the counters are enabled again, so there's no need to involve the
900 * monitors.
901 */
902 static int
903 uncore_add(struct monotonic_config *config, uint32_t *ctr_out)
904 {
905 if (mt_uncore_enabled) {
906 return EBUSY;
907 }
908
909 uint32_t available = ~uncore_active_ctrs & config->allowed_ctr_mask;
910
911 if (available == 0) {
912 return ENOSPC;
913 }
914
915 uint32_t valid_ctrs = (UINT32_C(1) << UNCORE_NCTRS) - 1;
916 if ((available & valid_ctrs) == 0) {
917 return E2BIG;
918 }
919
920 uint32_t ctr = __builtin_ffsll(available) - 1;
921
922 uncore_active_ctrs |= UINT64_C(1) << ctr;
923 uncore_config.uc_events.uce_ctrs[ctr] = (uint8_t)config->event;
924 uint64_t cpu_mask = UINT64_MAX;
925 if (config->cpu_mask != 0) {
926 cpu_mask = config->cpu_mask;
927 }
928 for (unsigned int i = 0; i < uncore_nmonitors(); i++) {
929 #if UNCORE_PER_CLUSTER
930 const unsigned int shift = topology_info->clusters[i].first_cpu_id;
931 #else /* UNCORE_PER_CLUSTER */
932 const unsigned int shift = 0;
933 #endif /* !UNCORE_PER_CLUSTER */
934 uncore_config.uc_cpu_masks[i].uccm_masks[ctr] = (uint16_t)(cpu_mask >> shift);
935 }
936
937 *ctr_out = ctr;
938 return 0;
939 }
940
941 /*
942 * Support for monotonic's mtd_reset function.
943 */
944
945 /*
946 * Reset all configuration and disable the counters if they're currently
947 * counting.
948 */
949 static void
950 uncore_reset(void)
951 {
952 mt_uncore_enabled = false;
953
954 unsigned int curmonid = uncmon_get_curid();
955
956 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
957 struct uncore_monitor *mon = &uncore_monitors[monid];
958 bool remote = monid != curmonid;
959
960 int intrs_en = uncmon_lock(mon);
961 if (remote) {
962 #if UNCORE_PER_CLUSTER
963 uncmon_set_counting_locked_r(monid, 0);
964 #endif /* UNCORE_PER_CLUSTER */
965 } else {
966 uncmon_set_counting_locked_l(monid, 0);
967 }
968
969 for (int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
970 if (uncore_active_ctrs & (1U << ctr)) {
971 if (remote) {
972 #if UNCORE_PER_CLUSTER
973 uncmon_write_counter_locked_r(monid, ctr, 0);
974 #endif /* UNCORE_PER_CLUSTER */
975 } else {
976 uncmon_write_counter_locked_l(monid, ctr, 0);
977 }
978 }
979 }
980
981 memset(&mon->um_snaps, 0, sizeof(mon->um_snaps));
982 memset(&mon->um_counts, 0, sizeof(mon->um_counts));
983 if (remote) {
984 #if UNCORE_PER_CLUSTER
985 uncmon_clear_int_locked_r(monid);
986 #endif /* UNCORE_PER_CLUSTER */
987 } else {
988 uncmon_clear_int_locked_l(monid);
989 }
990
991 uncmon_unlock(mon, intrs_en);
992 }
993
994 uncore_active_ctrs = 0;
995 memset(&uncore_config, 0, sizeof(uncore_config));
996
997 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
998 struct uncore_monitor *mon = &uncore_monitors[monid];
999 bool remote = monid != curmonid;
1000
1001 int intrs_en = uncmon_lock(mon);
1002 if (remote) {
1003 #if UNCORE_PER_CLUSTER
1004 uncmon_program_events_locked_r(monid);
1005 #endif /* UNCORE_PER_CLUSTER */
1006 } else {
1007 uncmon_program_events_locked_l(monid);
1008 }
1009 uncmon_unlock(mon, intrs_en);
1010 }
1011 }
1012
1013 /*
1014 * Support for monotonic's mtd_enable function.
1015 */
1016
1017 static void
1018 uncmon_set_enabled_l(unsigned int monid, bool enable)
1019 {
1020 struct uncore_monitor *mon = &uncore_monitors[monid];
1021 int intrs_en = uncmon_lock(mon);
1022
1023 if (enable) {
1024 uncmon_program_events_locked_l(monid);
1025 uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
1026 } else {
1027 uncmon_set_counting_locked_l(monid, 0);
1028 }
1029
1030 uncmon_unlock(mon, intrs_en);
1031 }
1032
1033 #if UNCORE_PER_CLUSTER
1034
1035 static void
1036 uncmon_set_enabled_r(unsigned int monid, bool enable)
1037 {
1038 struct uncore_monitor *mon = &uncore_monitors[monid];
1039 int intrs_en = uncmon_lock(mon);
1040
1041 if (enable) {
1042 uncmon_program_events_locked_r(monid);
1043 uncmon_set_counting_locked_r(monid, uncore_active_ctrs);
1044 } else {
1045 uncmon_set_counting_locked_r(monid, 0);
1046 }
1047
1048 uncmon_unlock(mon, intrs_en);
1049 }
1050
1051 #endif /* UNCORE_PER_CLUSTER */
1052
1053 static void
1054 uncore_set_enabled(bool enable)
1055 {
1056 mt_uncore_enabled = enable;
1057
1058 unsigned int curmonid = uncmon_get_curid();
1059 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
1060 if (monid != curmonid) {
1061 #if UNCORE_PER_CLUSTER
1062 uncmon_set_enabled_r(monid, enable);
1063 #endif /* UNCORE_PER_CLUSTER */
1064 } else {
1065 uncmon_set_enabled_l(monid, enable);
1066 }
1067 }
1068 }
1069
1070 /*
1071 * Hooks in the machine layer.
1072 */
1073
1074 static void
1075 uncore_fiq(uint64_t upmsr)
1076 {
1077 /*
1078 * Determine which counters overflowed.
1079 */
1080 uint64_t disable_ctr_mask = (upmsr & UPMSR_OVF_MASK) >> UPMSR_OVF_POS;
1081 /* should not receive interrupts from inactive counters */
1082 assert(!(disable_ctr_mask & ~uncore_active_ctrs));
1083
1084 unsigned int monid = uncmon_get_curid();
1085 struct uncore_monitor *mon = &uncore_monitors[monid];
1086
1087 int intrs_en = uncmon_lock(mon);
1088
1089 /*
1090 * Disable any counters that overflowed.
1091 */
1092 uncmon_set_counting_locked_l(monid,
1093 uncore_active_ctrs & ~disable_ctr_mask);
1094
1095 /*
1096 * With the overflowing counters disabled, capture their counts and reset
1097 * the UPMCs and their snapshots to 0.
1098 */
1099 for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
1100 if (UPMSR_OVF(upmsr, ctr)) {
1101 uncmon_update_locked(monid, monid, ctr);
1102 mon->um_snaps[ctr] = 0;
1103 uncmon_write_counter_locked_l(monid, ctr, 0);
1104 }
1105 }
1106
1107 /*
1108 * Acknowledge the interrupt, now that any overflowed PMCs have been reset.
1109 */
1110 uncmon_clear_int_locked_l(monid);
1111
1112 /*
1113 * Re-enable all active counters.
1114 */
1115 uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
1116
1117 uncmon_unlock(mon, intrs_en);
1118 }
1119
1120 static void
1121 uncore_save(void)
1122 {
1123 if (!uncore_active_ctrs) {
1124 return;
1125 }
1126
1127 unsigned int curmonid = uncmon_get_curid();
1128
1129 for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
1130 struct uncore_monitor *mon = &uncore_monitors[monid];
1131 int intrs_en = uncmon_lock(mon);
1132
1133 if (mt_uncore_enabled) {
1134 if (monid != curmonid) {
1135 #if UNCORE_PER_CLUSTER
1136 uncmon_set_counting_locked_r(monid, 0);
1137 #endif /* UNCORE_PER_CLUSTER */
1138 } else {
1139 uncmon_set_counting_locked_l(monid, 0);
1140 }
1141 }
1142
1143 for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
1144 if (uncore_active_ctrs & (1U << ctr)) {
1145 uncmon_update_locked(monid, curmonid, ctr);
1146 }
1147 }
1148
1149 mon->um_sleeping = true;
1150 uncmon_unlock(mon, intrs_en);
1151 }
1152 }
1153
1154 static void
1155 uncore_restore(void)
1156 {
1157 if (!uncore_active_ctrs) {
1158 return;
1159 }
1160 unsigned int curmonid = uncmon_get_curid();
1161
1162 struct uncore_monitor *mon = &uncore_monitors[curmonid];
1163 int intrs_en = uncmon_lock(mon);
1164 if (!mon->um_sleeping) {
1165 goto out;
1166 }
1167
1168 for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
1169 if (uncore_active_ctrs & (1U << ctr)) {
1170 uncmon_write_counter_locked_l(curmonid, ctr, mon->um_snaps[ctr]);
1171 }
1172 }
1173 uncmon_program_events_locked_l(curmonid);
1174 uncmon_init_locked_l(curmonid);
1175 mon->um_sleeping = false;
1176
1177 out:
1178 uncmon_unlock(mon, intrs_en);
1179 }
1180
1181 #endif /* HAS_UNCORE_CTRS */
1182
1183 #pragma mark common hooks
1184
1185 void
1186 mt_early_init(void)
1187 {
1188 topology_info = ml_get_topology_info();
1189 }
1190
1191 void
1192 mt_cpu_idle(cpu_data_t *cpu)
1193 {
1194 core_idle(cpu);
1195 }
1196
1197 void
1198 mt_cpu_run(cpu_data_t *cpu)
1199 {
1200 struct mt_cpu *mtc;
1201
1202 assert(cpu != NULL);
1203 assert(ml_get_interrupts_enabled() == FALSE);
1204
1205 mtc = &cpu->cpu_monotonic;
1206
1207 for (int i = 0; i < MT_CORE_NFIXED; i++) {
1208 mt_core_set_snap(i, mtc->mtc_snaps[i]);
1209 }
1210
1211 /* re-enable the counters */
1212 core_init_execution_modes();
1213
1214 core_set_enabled();
1215 }
1216
1217 void
1218 mt_cpu_down(cpu_data_t *cpu)
1219 {
1220 mt_cpu_idle(cpu);
1221 }
1222
1223 void
1224 mt_cpu_up(cpu_data_t *cpu)
1225 {
1226 mt_cpu_run(cpu);
1227 }
1228
1229 void
1230 mt_sleep(void)
1231 {
1232 #if HAS_UNCORE_CTRS
1233 uncore_save();
1234 #endif /* HAS_UNCORE_CTRS */
1235 }
1236
1237 void
1238 mt_wake_per_core(void)
1239 {
1240 #if HAS_UNCORE_CTRS
1241 if (mt_uncore_initted) {
1242 uncore_restore();
1243 }
1244 #endif /* HAS_UNCORE_CTRS */
1245 }
1246
1247 uint64_t
1248 mt_count_pmis(void)
1249 {
1250 uint64_t npmis = 0;
1251 for (unsigned int i = 0; i < topology_info->num_cpus; i++) {
1252 cpu_data_t *cpu = (cpu_data_t *)CpuDataEntries[topology_info->cpus[i].cpu_id].cpu_data_vaddr;
1253 npmis += cpu->cpu_monotonic.mtc_npmis;
1254 }
1255 return npmis;
1256 }
1257
1258 static void
1259 mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
1260 {
1261 assert(cpu != NULL);
1262 assert(ml_get_interrupts_enabled() == FALSE);
1263
1264 __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
1265 /*
1266 * Ensure the CPMU has flushed any increments at this point, so PMSR is up
1267 * to date.
1268 */
1269 __builtin_arm_isb(ISB_SY);
1270
1271 cpu->cpu_monotonic.mtc_npmis += 1;
1272 cpu->cpu_stat.pmi_cnt_wake += 1;
1273
1274 #if MONOTONIC_DEBUG
1275 if (!PMCR0_PMI(pmcr0)) {
1276 kprintf("monotonic: mt_cpu_pmi but no PMI (PMCR0 = %#llx)\n",
1277 pmcr0);
1278 }
1279 #else /* MONOTONIC_DEBUG */
1280 #pragma unused(pmcr0)
1281 #endif /* !MONOTONIC_DEBUG */
1282
1283 uint64_t pmsr = __builtin_arm_rsr64(PMSR);
1284
1285 #if MONOTONIC_DEBUG
1286 printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
1287 cpu_number(), pmsr, pmcr0);
1288 #endif /* MONOTONIC_DEBUG */
1289
1290 #if MACH_ASSERT
1291 uint64_t handled = 0;
1292 #endif /* MACH_ASSERT */
1293
1294 /*
1295 * monotonic handles any fixed counter PMIs.
1296 */
1297 for (unsigned int i = 0; i < MT_CORE_NFIXED; i++) {
1298 if ((pmsr & PMSR_OVF(i)) == 0) {
1299 continue;
1300 }
1301
1302 #if MACH_ASSERT
1303 handled |= 1ULL << i;
1304 #endif /* MACH_ASSERT */
1305 uint64_t count = mt_cpu_update_count(cpu, i);
1306 cpu->cpu_monotonic.mtc_counts[i] += count;
1307 mt_core_set_snap(i, mt_core_reset_values[i]);
1308 cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i];
1309
1310 if (mt_microstackshots && mt_microstackshot_ctr == i) {
1311 bool user_mode = false;
1312 arm_saved_state_t *state = get_user_regs(current_thread());
1313 if (state) {
1314 user_mode = PSR64_IS_USER(get_saved_state_cpsr(state));
1315 }
1316 KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1),
1317 mt_microstackshot_ctr, user_mode);
1318 mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx);
1319 } else if (mt_debug) {
1320 KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 2),
1321 i, count);
1322 }
1323 }
1324
1325 /*
1326 * KPC handles the configurable counter PMIs.
1327 */
1328 for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) {
1329 if (pmsr & PMSR_OVF(i)) {
1330 #if MACH_ASSERT
1331 handled |= 1ULL << i;
1332 #endif /* MACH_ASSERT */
1333 extern void kpc_pmi_handler(unsigned int ctr);
1334 kpc_pmi_handler(i);
1335 }
1336 }
1337
1338 #if MACH_ASSERT
1339 uint64_t pmsr_after_handling = __builtin_arm_rsr64(PMSR);
1340 if (pmsr_after_handling != 0) {
1341 unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1;
1342 uint64_t count = 0;
1343 const char *extra = "";
1344 if (first_ctr_ovf >= CORE_NCTRS) {
1345 extra = " (invalid counter)";
1346 } else {
1347 count = mt_core_snap(first_ctr_ovf);
1348 }
1349
1350 panic("monotonic: PMI status not cleared on exit from handler, "
1351 "PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
1352 "PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling,
1353 handled, __builtin_arm_rsr64(PMCR0), first_ctr_ovf, count, extra);
1354 }
1355 #endif /* MACH_ASSERT */
1356
1357 core_set_enabled();
1358 }
1359
1360 #if CPMU_AIC_PMI
1361 void
1362 mt_cpmu_aic_pmi(cpu_id_t source)
1363 {
1364 struct cpu_data *curcpu = getCpuDatap();
1365 if (source != curcpu->interrupt_nub) {
1366 panic("monotonic: PMI from IOCPU %p delivered to %p", source,
1367 curcpu->interrupt_nub);
1368 }
1369 mt_cpu_pmi(curcpu, __builtin_arm_rsr64(PMCR0));
1370 }
1371 #endif /* CPMU_AIC_PMI */
1372
1373 void
1374 mt_fiq(void *cpu, uint64_t pmcr0, uint64_t upmsr)
1375 {
1376 #if CPMU_AIC_PMI
1377 #pragma unused(cpu, pmcr0)
1378 #else /* CPMU_AIC_PMI */
1379 mt_cpu_pmi(cpu, pmcr0);
1380 #endif /* !CPMU_AIC_PMI */
1381
1382 #if HAS_UNCORE_CTRS
1383 uncore_fiq(upmsr);
1384 #else /* HAS_UNCORE_CTRS */
1385 #pragma unused(upmsr)
1386 #endif /* !HAS_UNCORE_CTRS */
1387 }
1388
1389 static uint32_t mt_xc_sync;
1390
1391 static void
1392 mt_microstackshot_start_remote(__unused void *arg)
1393 {
1394 cpu_data_t *cpu = getCpuDatap();
1395
1396 __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
1397
1398 for (int i = 0; i < MT_CORE_NFIXED; i++) {
1399 uint64_t count = mt_cpu_update_count(cpu, i);
1400 cpu->cpu_monotonic.mtc_counts[i] += count;
1401 mt_core_set_snap(i, mt_core_reset_values[i]);
1402 cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i];
1403 }
1404
1405 core_set_enabled();
1406
1407 if (os_atomic_dec(&mt_xc_sync, relaxed) == 0) {
1408 thread_wakeup((event_t)&mt_xc_sync);
1409 }
1410 }
1411
1412 int
1413 mt_microstackshot_start_arch(uint64_t period)
1414 {
1415 uint64_t reset_value = 0;
1416 int ovf = os_sub_overflow(CTR_MAX, period, &reset_value);
1417 if (ovf) {
1418 return ERANGE;
1419 }
1420
1421 mt_core_reset_values[mt_microstackshot_ctr] = reset_value;
1422 cpu_broadcast_xcall(&mt_xc_sync, TRUE, mt_microstackshot_start_remote,
1423 mt_microstackshot_start_remote /* cannot pass NULL */);
1424 return 0;
1425 }
1426
1427 #pragma mark dev nodes
1428
1429 struct mt_device mt_devices[] = {
1430 [0] = {
1431 .mtd_name = "core",
1432 .mtd_init = core_init,
1433 },
1434 #if HAS_UNCORE_CTRS
1435 [1] = {
1436 .mtd_name = "uncore",
1437 .mtd_init = uncore_init,
1438 .mtd_add = uncore_add,
1439 .mtd_reset = uncore_reset,
1440 .mtd_enable = uncore_set_enabled,
1441 .mtd_read = uncore_read,
1442
1443 .mtd_ncounters = UNCORE_NCTRS,
1444 }
1445 #endif /* HAS_UNCORE_CTRS */
1446 };
1447
1448 static_assert(
1449 (sizeof(mt_devices) / sizeof(mt_devices[0])) == MT_NDEVS,
1450 "MT_NDEVS macro should be same as the length of mt_devices");