2 * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <arm/cpu_data_internal.h>
30 #include <arm/machine_routines.h>
31 #include <arm64/monotonic.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h> /* panic */
35 #include <kern/monotonic.h>
36 #include <machine/atomic.h>
37 #include <machine/limits.h> /* CHAR_BIT */
38 #include <os/overflow.h>
39 #include <pexpert/arm64/board_config.h>
40 #include <pexpert/device_tree.h> /* SecureDTFindEntry */
41 #include <pexpert/pexpert.h>
42 #include <stdatomic.h>
45 #include <sys/errno.h>
46 #include <sys/monotonic.h>
49 * Ensure that control registers read back what was written under MACH_ASSERT
52 * A static inline function cannot be used due to passing the register through
53 * the builtin -- it requires a constant string as its first argument, since
54 * MSRs registers are encoded as an immediate in the instruction.
57 #define CTRL_REG_SET(reg, val) do { \
58 __builtin_arm_wsr64((reg), (val)); \
59 uint64_t __check_reg = __builtin_arm_rsr64((reg)); \
60 if (__check_reg != (val)) { \
61 panic("value written to %s was not read back (wrote %llx, read %llx)", \
62 #reg, (val), __check_reg); \
65 #else /* MACH_ASSERT */
66 #define CTRL_REG_SET(reg, val) __builtin_arm_wsr64((reg), (val))
67 #endif /* MACH_ASSERT */
69 #pragma mark core counters
71 bool mt_core_supported
= true;
73 static const ml_topology_info_t
*topology_info
;
76 * PMC[0-1] are the 48-bit fixed counters -- PMC0 is cycles and PMC1 is
77 * instructions (see arm64/monotonic.h).
79 * PMC2+ are currently handled by kpc.
81 #define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
85 #define PMC_8_9(X, A) X(8, A); X(9, A)
86 #else // CORE_NCTRS > 8
88 #endif // CORE_NCTRS > 8
90 #define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A)
92 #define CTR_MAX ((UINT64_C(1) << 47) - 1)
98 * PMC0's offset into a core's PIO range.
100 * This allows cores to remotely query another core's counters.
103 #define PIO_PMC0_OFFSET (0x200)
106 * The offset of the counter in the configuration registers. Post-Hurricane
107 * devices have additional counters that need a larger shift than the original
110 * XXX For now, just support the lower-numbered counters.
112 #define CTR_POS(CTR) (CTR)
115 * PMCR0 is the main control register for the performance monitor. It
116 * controls whether the counters are enabled, how they deliver interrupts, and
120 #define PMCR0_CTR_EN(CTR) (UINT64_C(1) << CTR_POS(CTR))
121 #define PMCR0_FIXED_EN (PMCR0_CTR_EN(CYCLES) | PMCR0_CTR_EN(INSTRS))
122 /* how interrupts are delivered on a PMI */
124 PMCR0_INTGEN_OFF
= 0,
125 PMCR0_INTGEN_PMI
= 1,
126 PMCR0_INTGEN_AIC
= 2,
127 PMCR0_INTGEN_HALT
= 3,
128 PMCR0_INTGEN_FIQ
= 4,
130 #define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8)
133 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC)
134 #else /* CPMU_AIC_PMI */
135 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ)
136 #endif /* !CPMU_AIC_PMI */
138 #define PMCR0_PMI_SHIFT (12)
139 #define PMCR0_CTR_GE8_PMI_SHIFT (44)
140 #define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR)))
141 /* fixed counters are always counting */
142 #define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS))
143 /* disable counting on a PMI */
144 #define PMCR0_DISCNT_EN (UINT64_C(1) << 20)
145 /* block PMIs until ERET retires */
146 #define PMCR0_WFRFE_EN (UINT64_C(1) << 22)
147 /* count global (not just core-local) L2C events */
148 #define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23)
149 /* user mode access to configuration registers */
150 #define PMCR0_USEREN_EN (UINT64_C(1) << 30)
151 #define PMCR0_CTR_GE8_EN_SHIFT (32)
153 #define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT)
156 * PMCR1 controls which execution modes count events.
158 #define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR)))
159 #define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR)))
160 #define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR)))
161 /* PMCR1_EL3A64 is not supported on systems with no monitor */
162 #if defined(APPLEHURRICANE)
163 #define PMCR1_EL3A64_EN(CTR) UINT64_C(0)
165 #define PMCR1_EL3A64_EN(CTR) (UINT64_C(1) << (24 + CTR_POS(CTR)))
167 #define PMCR1_ALL_EN(CTR) (PMCR1_EL0A32_EN(CTR) | PMCR1_EL0A64_EN(CTR) | \
168 PMCR1_EL1A64_EN(CTR) | PMCR1_EL3A64_EN(CTR))
170 /* fixed counters always count in all modes */
171 #define PMCR1_INIT (PMCR1_ALL_EN(CYCLES) | PMCR1_ALL_EN(INSTRS))
174 core_init_execution_modes(void)
178 pmcr1
= __builtin_arm_rsr64("PMCR1_EL1");
180 __builtin_arm_wsr64("PMCR1_EL1", pmcr1
);
183 #define PMSR_OVF(CTR) (1ULL << (CTR))
186 core_init(__unused mt_device_t dev
)
188 /* the dev node interface to the core counters is still unsupported */
195 return &getCpuDatap()->cpu_monotonic
;
199 mt_core_snap(unsigned int ctr
)
202 #define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(PMC ## CTR))
206 panic("monotonic: invalid core counter read: %u", ctr
);
207 __builtin_unreachable();
212 mt_core_set_snap(unsigned int ctr
, uint64_t count
)
216 __builtin_arm_wsr64("PMC0", count
);
219 __builtin_arm_wsr64("PMC1", count
);
222 panic("monotonic: invalid core counter %u write %llu", ctr
, count
);
223 __builtin_unreachable();
228 core_set_enabled(void)
230 uint64_t pmcr0
= __builtin_arm_rsr64("PMCR0_EL1");
231 pmcr0
|= PMCR0_INIT
| PMCR0_FIXED_EN
;
233 if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK
) {
234 uint64_t kpc_ctrs
= kpc_get_configurable_pmc_mask(
235 KPC_CLASS_CONFIGURABLE_MASK
) << MT_CORE_NFIXED
;
236 #if KPC_ARM64_CONFIGURABLE_COUNT > 6
237 uint64_t ctrs_ge8
= kpc_ctrs
>> 8;
238 pmcr0
|= ctrs_ge8
<< PMCR0_CTR_GE8_EN_SHIFT
;
239 pmcr0
|= ctrs_ge8
<< PMCR0_CTR_GE8_PMI_SHIFT
;
240 kpc_ctrs
&= (1ULL << 8) - 1;
241 #endif /* KPC_ARM64_CONFIGURABLE_COUNT > 6 */
242 kpc_ctrs
|= kpc_ctrs
<< PMCR0_PMI_SHIFT
;
246 __builtin_arm_wsr64("PMCR0_EL1", pmcr0
);
249 * Only check for the values that were ORed in.
251 uint64_t pmcr0_check
= __builtin_arm_rsr64("PMCR0_EL1");
252 if ((pmcr0_check
& (PMCR0_INIT
| PMCR0_FIXED_EN
)) != (PMCR0_INIT
| PMCR0_FIXED_EN
)) {
253 panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
256 #endif /* MACH_ASSERT */
260 core_idle(__unused cpu_data_t
*cpu
)
263 assert(ml_get_interrupts_enabled() == FALSE
);
266 uint64_t pmcr0
= __builtin_arm_rsr64("PMCR0_EL1");
267 if ((pmcr0
& PMCR0_FIXED_EN
) == 0) {
268 panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0
);
270 uint64_t pmcr1
= __builtin_arm_rsr64("PMCR1_EL1");
271 if ((pmcr1
& PMCR1_INIT
) == 0) {
272 panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1
);
276 /* disable counters before updating */
277 __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT
);
279 mt_update_fixed_counts();
282 #pragma mark uncore performance monitor
286 static bool mt_uncore_initted
= false;
289 * Uncore Performance Monitor
291 * Uncore performance monitors provide event-counting for the last-level caches
292 * (LLCs). Each LLC has its own uncore performance monitor, which can only be
293 * accessed by cores that use that LLC. Like the core performance monitoring
294 * unit, uncore counters are configured globally. If there is more than one
295 * LLC on the system, PIO reads must be used to satisfy uncore requests (using
296 * the `_r` remote variants of the access functions). Otherwise, local MSRs
297 * suffice (using the `_l` local variants of the access functions).
300 #if UNCORE_PER_CLUSTER
301 #define MAX_NMONITORS MAX_CPU_CLUSTERS
302 static uintptr_t cpm_impl
[MAX_NMONITORS
] = {};
304 #define MAX_NMONITORS (1)
305 #endif /* UNCORE_PER_CLUSTER */
307 #if UNCORE_VERSION >= 2
309 * V2 uncore monitors feature a CTI mechanism -- the second bit of UPMSR is
310 * used to track if a CTI has been triggered due to an overflow.
312 #define UPMSR_OVF_POS 2
313 #else /* UNCORE_VERSION >= 2 */
314 #define UPMSR_OVF_POS 1
315 #endif /* UNCORE_VERSION < 2 */
316 #define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1)
317 #define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS)
319 #define UPMPCM_CORE(ID) (UINT64_C(1) << (ID))
322 * The uncore_pmi_mask is a bitmask of CPUs that receive uncore PMIs. It's
323 * initialized by uncore_init and controllable by the uncore_pmi_mask boot-arg.
325 static int32_t uncore_pmi_mask
= 0;
328 * The uncore_active_ctrs is a bitmask of uncore counters that are currently
331 static uint16_t uncore_active_ctrs
= 0;
332 static_assert(sizeof(uncore_active_ctrs
) * CHAR_BIT
>= UNCORE_NCTRS
,
333 "counter mask should fit the full range of counters");
336 * mt_uncore_enabled is true when any uncore counters are active.
338 bool mt_uncore_enabled
= false;
341 * The uncore_events are the event configurations for each uncore counter -- as
342 * a union to make it easy to program the hardware registers.
344 static struct uncore_config
{
346 uint8_t uce_ctrs
[UNCORE_NCTRS
];
347 uint64_t uce_regs
[UNCORE_NCTRS
/ 8];
350 uint16_t uccm_masks
[UNCORE_NCTRS
];
351 uint64_t uccm_regs
[UNCORE_NCTRS
/ 4];
352 } uc_cpu_masks
[MAX_NMONITORS
];
355 static struct uncore_monitor
{
357 * The last snapshot of each of the hardware counter values.
359 uint64_t um_snaps
[UNCORE_NCTRS
];
362 * The accumulated counts for each counter.
364 uint64_t um_counts
[UNCORE_NCTRS
];
367 * Protects accessing the hardware registers and fields in this structure.
372 * Whether this monitor needs its registers restored after wake.
375 } uncore_monitors
[MAX_NMONITORS
];
378 * Each uncore unit has its own monitor, corresponding to the memory hierarchy
382 uncore_nmonitors(void)
384 #if UNCORE_PER_CLUSTER
385 return topology_info
->num_clusters
;
386 #else /* UNCORE_PER_CLUSTER */
388 #endif /* !UNCORE_PER_CLUSTER */
392 uncmon_get_curid(void)
394 #if UNCORE_PER_CLUSTER
395 // Pointer arithmetic to translate cluster_id into a clusters[] index.
396 return cpu_cluster_id();
397 #else /* UNCORE_PER_CLUSTER */
399 #endif /* !UNCORE_PER_CLUSTER */
403 * Per-monitor locks are required to prevent races with the PMI handlers, not
404 * from other CPUs that are configuring (those are serialized with monotonic's
409 uncmon_lock(struct uncore_monitor
*mon
)
411 int intrs_en
= ml_set_interrupts_enabled(FALSE
);
412 lck_spin_lock(&mon
->um_lock
);
417 uncmon_unlock(struct uncore_monitor
*mon
, int intrs_en
)
419 lck_spin_unlock(&mon
->um_lock
);
420 (void)ml_set_interrupts_enabled(intrs_en
);
424 * Helper functions for accessing the hardware -- these require the monitor be
425 * locked to prevent other CPUs' PMI handlers from making local modifications
426 * or updating the counts.
429 #if UNCORE_VERSION >= 2
430 #define UPMCR0_INTEN_POS 20
431 #define UPMCR0_INTGEN_POS 16
432 #else /* UNCORE_VERSION >= 2 */
433 #define UPMCR0_INTEN_POS 12
434 #define UPMCR0_INTGEN_POS 8
435 #endif /* UNCORE_VERSION < 2 */
437 UPMCR0_INTGEN_OFF
= 0,
438 /* fast PMIs are only supported on core CPMU */
439 UPMCR0_INTGEN_AIC
= 2,
440 UPMCR0_INTGEN_HALT
= 3,
441 UPMCR0_INTGEN_FIQ
= 4,
443 /* always enable interrupts for all counters */
444 #define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS)
445 /* route uncore PMIs through the FIQ path */
446 #define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS))
449 * Turn counting on for counters set in the `enctrmask` and off, otherwise.
452 uncmon_set_counting_locked_l(__unused
unsigned int monid
, uint64_t enctrmask
)
455 * UPMCR0 controls which counters are enabled and how interrupts are generated
458 __builtin_arm_wsr64("UPMCR0_EL1", UPMCR0_INIT
| enctrmask
);
461 #if UNCORE_PER_CLUSTER
464 * Turn counting on for counters set in the `enctrmask` and off, otherwise.
467 uncmon_set_counting_locked_r(unsigned int monid
, uint64_t enctrmask
)
469 const uintptr_t upmcr0_offset
= 0x4180;
470 *(uint64_t *)(cpm_impl
[monid
] + upmcr0_offset
) = UPMCR0_INIT
| enctrmask
;
473 #endif /* UNCORE_PER_CLUSTER */
476 * The uncore performance monitoring counters (UPMCs) are 48-bits wide. The
477 * high bit is an overflow bit, triggering a PMI, providing 47 usable bits.
480 #define UPMC_MAX ((UINT64_C(1) << 48) - 1)
483 * The `__builtin_arm_{r,w}sr` functions require constant strings, since the
484 * MSR/MRS instructions encode the registers as immediates. Otherwise, this
485 * would be indexing into an array of strings.
488 #define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
490 #if UNCORE_NCTRS <= 8
491 #define UPMC_ALL(X, A) UPMC_0_7(X, A)
492 #else /* UNCORE_NCTRS <= 8 */
493 #define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \
494 X(13, A); X(14, A); X(15, A)
495 #define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A)
496 #endif /* UNCORE_NCTRS > 8 */
498 static inline uint64_t
499 uncmon_read_counter_locked_l(__unused
unsigned int monid
, unsigned int ctr
)
501 assert(ctr
< UNCORE_NCTRS
);
503 #define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(UPMC ## CTR))
504 UPMC_ALL(UPMC_RD
, 0);
507 panic("monotonic: invalid counter read %u", ctr
);
508 __builtin_unreachable();
513 uncmon_write_counter_locked_l(__unused
unsigned int monid
, unsigned int ctr
,
516 assert(count
< UPMC_MAX
);
517 assert(ctr
< UNCORE_NCTRS
);
519 #define UPMC_WR(CTR, COUNT) case (CTR): \
520 return __builtin_arm_wsr64(__MSR_STR(UPMC ## CTR), (COUNT))
521 UPMC_ALL(UPMC_WR
, count
);
524 panic("monotonic: invalid counter write %u", ctr
);
528 #if UNCORE_PER_CLUSTER
530 uintptr_t upmc_offs
[UNCORE_NCTRS
] = {
531 [0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120,
532 [5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268,
533 [10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278,
534 [14] = 0x4170, [15] = 0x4280,
537 static inline uint64_t
538 uncmon_read_counter_locked_r(unsigned int mon_id
, unsigned int ctr
)
540 assert(mon_id
< uncore_nmonitors());
541 assert(ctr
< UNCORE_NCTRS
);
542 return *(uint64_t *)(cpm_impl
[mon_id
] + upmc_offs
[ctr
]);
546 uncmon_write_counter_locked_r(unsigned int mon_id
, unsigned int ctr
,
549 assert(count
< UPMC_MAX
);
550 assert(ctr
< UNCORE_NCTRS
);
551 assert(mon_id
< uncore_nmonitors());
552 *(uint64_t *)(cpm_impl
[mon_id
] + upmc_offs
[ctr
]) = count
;
555 #endif /* UNCORE_PER_CLUSTER */
558 uncmon_update_locked(unsigned int monid
, unsigned int curid
, unsigned int ctr
)
560 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
562 if (curid
== monid
) {
563 snap
= uncmon_read_counter_locked_l(monid
, ctr
);
565 #if UNCORE_PER_CLUSTER
566 snap
= uncmon_read_counter_locked_r(monid
, ctr
);
567 #endif /* UNCORE_PER_CLUSTER */
569 /* counters should increase monotonically */
570 assert(snap
>= mon
->um_snaps
[ctr
]);
571 mon
->um_counts
[ctr
] += snap
- mon
->um_snaps
[ctr
];
572 mon
->um_snaps
[ctr
] = snap
;
576 uncmon_program_events_locked_l(unsigned int monid
)
579 * UPMESR[01] is the event selection register that determines which event a
580 * counter will count.
582 CTRL_REG_SET("UPMESR0_EL1", uncore_config
.uc_events
.uce_regs
[0]);
585 CTRL_REG_SET("UPMESR1_EL1", uncore_config
.uc_events
.uce_regs
[1]);
586 #endif /* UNCORE_NCTRS > 8 */
589 * UPMECM[0123] are the event core masks for each counter -- whether or not
590 * that counter counts events generated by an agent. These are set to all
591 * ones so the uncore counters count events from all cores.
593 * The bits are based off the start of the cluster -- e.g. even if a core
594 * has a CPU ID of 4, it might be the first CPU in a cluster. Shift the
595 * registers right by the ID of the first CPU in the cluster.
597 CTRL_REG_SET("UPMECM0_EL1",
598 uncore_config
.uc_cpu_masks
[monid
].uccm_regs
[0]);
599 CTRL_REG_SET("UPMECM1_EL1",
600 uncore_config
.uc_cpu_masks
[monid
].uccm_regs
[1]);
603 CTRL_REG_SET("UPMECM2_EL1",
604 uncore_config
.uc_cpu_masks
[monid
].uccm_regs
[2]);
605 CTRL_REG_SET("UPMECM3_EL1",
606 uncore_config
.uc_cpu_masks
[monid
].uccm_regs
[3]);
607 #endif /* UNCORE_NCTRS > 8 */
610 #if UNCORE_PER_CLUSTER
613 uncmon_program_events_locked_r(unsigned int monid
)
615 const uintptr_t upmesr_offs
[2] = {[0] = 0x41b0, [1] = 0x41b8, };
617 for (unsigned int i
= 0; i
< sizeof(upmesr_offs
) / sizeof(upmesr_offs
[0]);
619 *(uint64_t *)(cpm_impl
[monid
] + upmesr_offs
[i
]) =
620 uncore_config
.uc_events
.uce_regs
[i
];
623 const uintptr_t upmecm_offs
[4] = {
624 [0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8,
627 for (unsigned int i
= 0; i
< sizeof(upmecm_offs
) / sizeof(upmecm_offs
[0]);
629 *(uint64_t *)(cpm_impl
[monid
] + upmecm_offs
[i
]) =
630 uncore_config
.uc_cpu_masks
[monid
].uccm_regs
[i
];
634 #endif /* UNCORE_PER_CLUSTER */
637 uncmon_clear_int_locked_l(__unused
unsigned int monid
)
639 __builtin_arm_wsr64("UPMSR_EL1", 0);
642 #if UNCORE_PER_CLUSTER
645 uncmon_clear_int_locked_r(unsigned int monid
)
647 const uintptr_t upmsr_off
= 0x41c0;
648 *(uint64_t *)(cpm_impl
[monid
] + upmsr_off
) = 0;
651 #endif /* UNCORE_PER_CLUSTER */
654 * Get the PMI mask for the provided `monid` -- that is, the bitmap of CPUs
655 * that should be sent PMIs for a particular monitor.
658 uncmon_get_pmi_mask(unsigned int monid
)
660 uint64_t pmi_mask
= uncore_pmi_mask
;
662 #if UNCORE_PER_CLUSTER
663 pmi_mask
&= topology_info
->clusters
[monid
].cpu_mask
;
664 #else /* UNCORE_PER_CLUSTER */
665 #pragma unused(monid)
666 #endif /* !UNCORE_PER_CLUSTER */
672 * Initialization routines for the uncore counters.
676 uncmon_init_locked_l(unsigned int monid
)
679 * UPMPCM defines the PMI core mask for the UPMCs -- which cores should
680 * receive interrupts on overflow.
682 CTRL_REG_SET("UPMPCM_EL1", uncmon_get_pmi_mask(monid
));
683 uncmon_set_counting_locked_l(monid
,
684 mt_uncore_enabled
? uncore_active_ctrs
: 0);
687 #if UNCORE_PER_CLUSTER
689 static uintptr_t acc_impl
[MAX_NMONITORS
] = {};
692 uncmon_init_locked_r(unsigned int monid
)
694 const uintptr_t upmpcm_off
= 0x1010;
696 *(uint64_t *)(acc_impl
[monid
] + upmpcm_off
) = uncmon_get_pmi_mask(monid
);
697 uncmon_set_counting_locked_r(monid
,
698 mt_uncore_enabled
? uncore_active_ctrs
: 0);
701 #endif /* UNCORE_PER_CLUSTER */
704 * Initialize the uncore device for monotonic.
707 uncore_init(__unused mt_device_t dev
)
710 assert(MT_NDEVS
> 0);
711 mt_devices
[MT_NDEVS
- 1].mtd_nmonitors
= (uint8_t)uncore_nmonitors();
714 #if DEVELOPMENT || DEBUG
716 * Development and debug kernels observe the `uncore_pmi_mask` boot-arg,
717 * allowing PMIs to be routed to the CPUs present in the supplied bitmap.
718 * Do some sanity checks on the value provided.
720 bool parsed_arg
= PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask
,
721 sizeof(uncore_pmi_mask
));
723 #if UNCORE_PER_CLUSTER
724 if (__builtin_popcount(uncore_pmi_mask
) != (int)uncore_nmonitors()) {
725 panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask
);
727 for (unsigned int i
= 0; i
< uncore_nmonitors(); i
++) {
728 if (__builtin_popcountll(uncmon_get_pmi_mask(i
)) != 1) {
729 panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x",
733 #else /* UNCORE_PER_CLUSTER */
734 if (__builtin_popcount(uncore_pmi_mask
) != 1) {
735 panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask
);
737 #endif /* !UNCORE_PER_CLUSTER */
739 #endif /* DEVELOPMENT || DEBUG */
741 #if UNCORE_PER_CLUSTER
742 for (unsigned int i
= 0; i
< topology_info
->num_clusters
; i
++) {
743 uncore_pmi_mask
|= 1ULL << topology_info
->clusters
[i
].first_cpu_id
;
745 #else /* UNCORE_PER_CLUSTER */
746 /* arbitrarily route to core 0 */
747 uncore_pmi_mask
|= 1;
748 #endif /* !UNCORE_PER_CLUSTER */
750 assert(uncore_pmi_mask
!= 0);
752 unsigned int curmonid
= uncmon_get_curid();
754 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
755 #if UNCORE_PER_CLUSTER
756 ml_topology_cluster_t
*cluster
= &topology_info
->clusters
[monid
];
757 cpm_impl
[monid
] = (uintptr_t)cluster
->cpm_IMPL_regs
;
758 acc_impl
[monid
] = (uintptr_t)cluster
->acc_IMPL_regs
;
759 assert(cpm_impl
[monid
] != 0 && acc_impl
[monid
] != 0);
760 #endif /* UNCORE_PER_CLUSTER */
762 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
763 lck_spin_init(&mon
->um_lock
, &mt_lock_grp
, LCK_ATTR_NULL
);
765 int intrs_en
= uncmon_lock(mon
);
766 if (monid
!= curmonid
) {
767 #if UNCORE_PER_CLUSTER
768 uncmon_init_locked_r(monid
);
769 #endif /* UNCORE_PER_CLUSTER */
771 uncmon_init_locked_l(monid
);
773 uncmon_unlock(mon
, intrs_en
);
776 mt_uncore_initted
= true;
782 * Support for monotonic's mtd_read function.
786 uncmon_read_all_counters(unsigned int monid
, unsigned int curmonid
,
787 uint64_t ctr_mask
, uint64_t *counts
)
789 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
791 int intrs_en
= uncmon_lock(mon
);
793 for (unsigned int ctr
= 0; ctr
< UNCORE_NCTRS
; ctr
++) {
794 if (ctr_mask
& (1ULL << ctr
)) {
795 uncmon_update_locked(monid
, curmonid
, ctr
);
796 counts
[ctr
] = mon
->um_counts
[ctr
];
800 uncmon_unlock(mon
, intrs_en
);
804 * Read all monitor's counters.
807 uncore_read(uint64_t ctr_mask
, uint64_t *counts_out
)
809 assert(ctr_mask
!= 0);
810 assert(counts_out
!= NULL
);
812 if (!uncore_active_ctrs
) {
815 if (ctr_mask
& ~uncore_active_ctrs
) {
819 unsigned int curmonid
= uncmon_get_curid();
820 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
822 * Find this monitor's starting offset into the `counts_out` array.
824 uint64_t *counts
= counts_out
+ (UNCORE_NCTRS
* monid
);
826 uncmon_read_all_counters(monid
, curmonid
, ctr_mask
, counts
);
833 * Support for monotonic's mtd_add function.
837 * Add an event to the current uncore configuration. This doesn't take effect
838 * until the counters are enabled again, so there's no need to involve the
842 uncore_add(struct monotonic_config
*config
, uint32_t *ctr_out
)
844 if (mt_uncore_enabled
) {
848 uint32_t available
= ~uncore_active_ctrs
& config
->allowed_ctr_mask
;
850 if (available
== 0) {
854 uint32_t valid_ctrs
= (UINT32_C(1) << UNCORE_NCTRS
) - 1;
855 if ((available
& valid_ctrs
) == 0) {
859 uint32_t ctr
= __builtin_ffsll(available
) - 1;
861 uncore_active_ctrs
|= UINT64_C(1) << ctr
;
862 uncore_config
.uc_events
.uce_ctrs
[ctr
] = (uint8_t)config
->event
;
863 uint64_t cpu_mask
= UINT64_MAX
;
864 if (config
->cpu_mask
!= 0) {
865 cpu_mask
= config
->cpu_mask
;
867 for (unsigned int i
= 0; i
< uncore_nmonitors(); i
++) {
868 #if UNCORE_PER_CLUSTER
869 const unsigned int shift
= topology_info
->clusters
[i
].first_cpu_id
;
870 #else /* UNCORE_PER_CLUSTER */
871 const unsigned int shift
= 0;
872 #endif /* !UNCORE_PER_CLUSTER */
873 uncore_config
.uc_cpu_masks
[i
].uccm_masks
[ctr
] = (uint16_t)(cpu_mask
>> shift
);
881 * Support for monotonic's mtd_reset function.
885 * Reset all configuration and disable the counters if they're currently
891 mt_uncore_enabled
= false;
893 unsigned int curmonid
= uncmon_get_curid();
895 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
896 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
897 bool remote
= monid
!= curmonid
;
899 int intrs_en
= uncmon_lock(mon
);
901 #if UNCORE_PER_CLUSTER
902 uncmon_set_counting_locked_r(monid
, 0);
903 #endif /* UNCORE_PER_CLUSTER */
905 uncmon_set_counting_locked_l(monid
, 0);
908 for (int ctr
= 0; ctr
< UNCORE_NCTRS
; ctr
++) {
909 if (uncore_active_ctrs
& (1U << ctr
)) {
911 #if UNCORE_PER_CLUSTER
912 uncmon_write_counter_locked_r(monid
, ctr
, 0);
913 #endif /* UNCORE_PER_CLUSTER */
915 uncmon_write_counter_locked_l(monid
, ctr
, 0);
920 memset(&mon
->um_snaps
, 0, sizeof(mon
->um_snaps
));
921 memset(&mon
->um_counts
, 0, sizeof(mon
->um_counts
));
923 #if UNCORE_PER_CLUSTER
924 uncmon_clear_int_locked_r(monid
);
925 #endif /* UNCORE_PER_CLUSTER */
927 uncmon_clear_int_locked_l(monid
);
930 uncmon_unlock(mon
, intrs_en
);
933 uncore_active_ctrs
= 0;
934 memset(&uncore_config
, 0, sizeof(uncore_config
));
936 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
937 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
938 bool remote
= monid
!= curmonid
;
940 int intrs_en
= uncmon_lock(mon
);
942 #if UNCORE_PER_CLUSTER
943 uncmon_program_events_locked_r(monid
);
944 #endif /* UNCORE_PER_CLUSTER */
946 uncmon_program_events_locked_l(monid
);
948 uncmon_unlock(mon
, intrs_en
);
953 * Support for monotonic's mtd_enable function.
957 uncmon_set_enabled_l(unsigned int monid
, bool enable
)
959 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
960 int intrs_en
= uncmon_lock(mon
);
963 uncmon_program_events_locked_l(monid
);
964 uncmon_set_counting_locked_l(monid
, uncore_active_ctrs
);
966 uncmon_set_counting_locked_l(monid
, 0);
969 uncmon_unlock(mon
, intrs_en
);
972 #if UNCORE_PER_CLUSTER
975 uncmon_set_enabled_r(unsigned int monid
, bool enable
)
977 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
978 int intrs_en
= uncmon_lock(mon
);
981 uncmon_program_events_locked_r(monid
);
982 uncmon_set_counting_locked_r(monid
, uncore_active_ctrs
);
984 uncmon_set_counting_locked_r(monid
, 0);
987 uncmon_unlock(mon
, intrs_en
);
990 #endif /* UNCORE_PER_CLUSTER */
993 uncore_set_enabled(bool enable
)
995 mt_uncore_enabled
= enable
;
997 unsigned int curmonid
= uncmon_get_curid();
998 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
999 if (monid
!= curmonid
) {
1000 #if UNCORE_PER_CLUSTER
1001 uncmon_set_enabled_r(monid
, enable
);
1002 #endif /* UNCORE_PER_CLUSTER */
1004 uncmon_set_enabled_l(monid
, enable
);
1010 * Hooks in the machine layer.
1014 uncore_fiq(uint64_t upmsr
)
1017 * Determine which counters overflowed.
1019 uint64_t disable_ctr_mask
= (upmsr
& UPMSR_OVF_MASK
) >> UPMSR_OVF_POS
;
1020 /* should not receive interrupts from inactive counters */
1021 assert(!(disable_ctr_mask
& ~uncore_active_ctrs
));
1023 unsigned int monid
= uncmon_get_curid();
1024 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
1026 int intrs_en
= uncmon_lock(mon
);
1029 * Disable any counters that overflowed.
1031 uncmon_set_counting_locked_l(monid
,
1032 uncore_active_ctrs
& ~disable_ctr_mask
);
1035 * With the overflowing counters disabled, capture their counts and reset
1036 * the UPMCs and their snapshots to 0.
1038 for (unsigned int ctr
= 0; ctr
< UNCORE_NCTRS
; ctr
++) {
1039 if (UPMSR_OVF(upmsr
, ctr
)) {
1040 uncmon_update_locked(monid
, monid
, ctr
);
1041 mon
->um_snaps
[ctr
] = 0;
1042 uncmon_write_counter_locked_l(monid
, ctr
, 0);
1047 * Acknowledge the interrupt, now that any overflowed PMCs have been reset.
1049 uncmon_clear_int_locked_l(monid
);
1052 * Re-enable all active counters.
1054 uncmon_set_counting_locked_l(monid
, uncore_active_ctrs
);
1056 uncmon_unlock(mon
, intrs_en
);
1062 if (!uncore_active_ctrs
) {
1066 unsigned int curmonid
= uncmon_get_curid();
1068 for (unsigned int monid
= 0; monid
< uncore_nmonitors(); monid
++) {
1069 struct uncore_monitor
*mon
= &uncore_monitors
[monid
];
1070 int intrs_en
= uncmon_lock(mon
);
1072 if (mt_uncore_enabled
) {
1073 if (monid
!= curmonid
) {
1074 #if UNCORE_PER_CLUSTER
1075 uncmon_set_counting_locked_r(monid
, 0);
1076 #endif /* UNCORE_PER_CLUSTER */
1078 uncmon_set_counting_locked_l(monid
, 0);
1082 for (unsigned int ctr
= 0; ctr
< UNCORE_NCTRS
; ctr
++) {
1083 if (uncore_active_ctrs
& (1U << ctr
)) {
1084 uncmon_update_locked(monid
, curmonid
, ctr
);
1088 mon
->um_sleeping
= true;
1089 uncmon_unlock(mon
, intrs_en
);
1094 uncore_restore(void)
1096 if (!uncore_active_ctrs
) {
1099 unsigned int curmonid
= uncmon_get_curid();
1101 struct uncore_monitor
*mon
= &uncore_monitors
[curmonid
];
1102 int intrs_en
= uncmon_lock(mon
);
1103 if (!mon
->um_sleeping
) {
1107 for (unsigned int ctr
= 0; ctr
< UNCORE_NCTRS
; ctr
++) {
1108 if (uncore_active_ctrs
& (1U << ctr
)) {
1109 uncmon_write_counter_locked_l(curmonid
, ctr
, mon
->um_snaps
[ctr
]);
1112 uncmon_program_events_locked_l(curmonid
);
1113 uncmon_init_locked_l(curmonid
);
1114 mon
->um_sleeping
= false;
1117 uncmon_unlock(mon
, intrs_en
);
1120 #endif /* HAS_UNCORE_CTRS */
1122 #pragma mark common hooks
1127 topology_info
= ml_get_topology_info();
1131 mt_cpu_idle(cpu_data_t
*cpu
)
1137 mt_cpu_run(cpu_data_t
*cpu
)
1141 assert(cpu
!= NULL
);
1142 assert(ml_get_interrupts_enabled() == FALSE
);
1144 mtc
= &cpu
->cpu_monotonic
;
1146 for (int i
= 0; i
< MT_CORE_NFIXED
; i
++) {
1147 mt_core_set_snap(i
, mtc
->mtc_snaps
[i
]);
1150 /* re-enable the counters */
1151 core_init_execution_modes();
1157 mt_cpu_down(cpu_data_t
*cpu
)
1163 mt_cpu_up(cpu_data_t
*cpu
)
1173 #endif /* HAS_UNCORE_CTRS */
1177 mt_wake_per_core(void)
1180 if (mt_uncore_initted
) {
1183 #endif /* HAS_UNCORE_CTRS */
1190 for (unsigned int i
= 0; i
< topology_info
->num_cpus
; i
++) {
1191 cpu_data_t
*cpu
= (cpu_data_t
*)CpuDataEntries
[topology_info
->cpus
[i
].cpu_id
].cpu_data_vaddr
;
1192 npmis
+= cpu
->cpu_monotonic
.mtc_npmis
;
1198 mt_cpu_pmi(cpu_data_t
*cpu
, uint64_t pmcr0
)
1200 assert(cpu
!= NULL
);
1201 assert(ml_get_interrupts_enabled() == FALSE
);
1203 __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT
);
1205 * Ensure the CPMU has flushed any increments at this point, so PMSR is up
1208 __builtin_arm_isb(ISB_SY
);
1210 cpu
->cpu_monotonic
.mtc_npmis
+= 1;
1211 cpu
->cpu_stat
.pmi_cnt_wake
+= 1;
1214 if (!PMCR0_PMI(pmcr0
)) {
1215 kprintf("monotonic: mt_cpu_pmi but no PMI (PMCR0 = %#llx)\n",
1218 #else /* MONOTONIC_DEBUG */
1219 #pragma unused(pmcr0)
1220 #endif /* !MONOTONIC_DEBUG */
1222 uint64_t pmsr
= __builtin_arm_rsr64("PMSR_EL1");
1225 printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
1226 cpu_number(), pmsr
, pmcr0
);
1227 #endif /* MONOTONIC_DEBUG */
1230 uint64_t handled
= 0;
1231 #endif /* MACH_ASSERT */
1234 * monotonic handles any fixed counter PMIs.
1236 for (unsigned int i
= 0; i
< MT_CORE_NFIXED
; i
++) {
1237 if ((pmsr
& PMSR_OVF(i
)) == 0) {
1242 handled
|= 1ULL << i
;
1243 #endif /* MACH_ASSERT */
1244 uint64_t count
= mt_cpu_update_count(cpu
, i
);
1245 cpu
->cpu_monotonic
.mtc_counts
[i
] += count
;
1246 mt_core_set_snap(i
, mt_core_reset_values
[i
]);
1247 cpu
->cpu_monotonic
.mtc_snaps
[i
] = mt_core_reset_values
[i
];
1249 if (mt_microstackshots
&& mt_microstackshot_ctr
== i
) {
1250 bool user_mode
= false;
1251 arm_saved_state_t
*state
= get_user_regs(current_thread());
1253 user_mode
= PSR64_IS_USER(get_saved_state_cpsr(state
));
1255 KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC
, DBG_MT_DEBUG
, 1),
1256 mt_microstackshot_ctr
, user_mode
);
1257 mt_microstackshot_pmi_handler(user_mode
, mt_microstackshot_ctx
);
1258 } else if (mt_debug
) {
1259 KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC
, DBG_MT_DEBUG
, 2),
1265 * KPC handles the configurable counter PMIs.
1267 for (unsigned int i
= MT_CORE_NFIXED
; i
< CORE_NCTRS
; i
++) {
1268 if (pmsr
& PMSR_OVF(i
)) {
1270 handled
|= 1ULL << i
;
1271 #endif /* MACH_ASSERT */
1272 extern void kpc_pmi_handler(unsigned int ctr
);
1278 uint64_t pmsr_after_handling
= __builtin_arm_rsr64("PMSR_EL1");
1279 if (pmsr_after_handling
!= 0) {
1280 unsigned int first_ctr_ovf
= __builtin_ffsll(pmsr_after_handling
) - 1;
1282 const char *extra
= "";
1283 if (first_ctr_ovf
>= CORE_NCTRS
) {
1284 extra
= " (invalid counter)";
1286 count
= mt_core_snap(first_ctr_ovf
);
1289 panic("monotonic: PMI status not cleared on exit from handler, "
1290 "PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
1291 "PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr
, pmsr_after_handling
,
1292 handled
, __builtin_arm_rsr64("PMCR0_EL1"), first_ctr_ovf
, count
, extra
);
1294 #endif /* MACH_ASSERT */
1301 mt_cpmu_aic_pmi(cpu_id_t source
)
1303 struct cpu_data
*curcpu
= getCpuDatap();
1304 if (source
!= curcpu
->interrupt_nub
) {
1305 panic("monotonic: PMI from IOCPU %p delivered to %p", source
,
1306 curcpu
->interrupt_nub
);
1308 mt_cpu_pmi(curcpu
, __builtin_arm_rsr64("PMCR0_EL1"));
1310 #endif /* CPMU_AIC_PMI */
1313 mt_fiq(void *cpu
, uint64_t pmcr0
, uint64_t upmsr
)
1316 #pragma unused(cpu, pmcr0)
1317 #else /* CPMU_AIC_PMI */
1318 mt_cpu_pmi(cpu
, pmcr0
);
1319 #endif /* !CPMU_AIC_PMI */
1323 #else /* HAS_UNCORE_CTRS */
1324 #pragma unused(upmsr)
1325 #endif /* !HAS_UNCORE_CTRS */
1328 static uint32_t mt_xc_sync
;
1331 mt_microstackshot_start_remote(__unused
void *arg
)
1333 cpu_data_t
*cpu
= getCpuDatap();
1335 __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT
);
1337 for (int i
= 0; i
< MT_CORE_NFIXED
; i
++) {
1338 uint64_t count
= mt_cpu_update_count(cpu
, i
);
1339 cpu
->cpu_monotonic
.mtc_counts
[i
] += count
;
1340 mt_core_set_snap(i
, mt_core_reset_values
[i
]);
1341 cpu
->cpu_monotonic
.mtc_snaps
[i
] = mt_core_reset_values
[i
];
1346 if (os_atomic_dec(&mt_xc_sync
, relaxed
) == 0) {
1347 thread_wakeup((event_t
)&mt_xc_sync
);
1352 mt_microstackshot_start_arch(uint64_t period
)
1354 uint64_t reset_value
= 0;
1355 int ovf
= os_sub_overflow(CTR_MAX
, period
, &reset_value
);
1360 mt_core_reset_values
[mt_microstackshot_ctr
] = reset_value
;
1361 cpu_broadcast_xcall(&mt_xc_sync
, TRUE
, mt_microstackshot_start_remote
,
1362 mt_microstackshot_start_remote
/* cannot pass NULL */);
1366 #pragma mark dev nodes
1368 struct mt_device mt_devices
[] = {
1371 .mtd_init
= core_init
,
1375 .mtd_name
= "uncore",
1376 .mtd_init
= uncore_init
,
1377 .mtd_add
= uncore_add
,
1378 .mtd_reset
= uncore_reset
,
1379 .mtd_enable
= uncore_set_enabled
,
1380 .mtd_read
= uncore_read
,
1382 .mtd_ncounters
= UNCORE_NCTRS
,
1384 #endif /* HAS_UNCORE_CTRS */
1388 (sizeof(mt_devices
) / sizeof(mt_devices
[0])) == MT_NDEVS
,
1389 "MT_NDEVS macro should be same as the length of mt_devices");