]>
Commit | Line | Data |
---|---|---|
5ba3f43e | 1 | /* |
ea3f0419 | 2 | * Copyright (c) 2017-2020 Apple Inc. All rights reserved. |
5ba3f43e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <arm/cpu_data_internal.h> | |
30 | #include <arm/machine_routines.h> | |
31 | #include <arm64/monotonic.h> | |
d9a64523 | 32 | #include <kern/assert.h> |
5ba3f43e | 33 | #include <kern/debug.h> /* panic */ |
ea3f0419 | 34 | #include <kern/kpc.h> |
5ba3f43e | 35 | #include <kern/monotonic.h> |
0a7de745 | 36 | #include <machine/atomic.h> |
5ba3f43e | 37 | #include <machine/limits.h> /* CHAR_BIT */ |
0a7de745 A |
38 | #include <os/overflow.h> |
39 | #include <pexpert/arm64/board_config.h> | |
f427ee49 | 40 | #include <pexpert/device_tree.h> /* SecureDTFindEntry */ |
0a7de745 | 41 | #include <pexpert/pexpert.h> |
5ba3f43e A |
42 | #include <stdatomic.h> |
43 | #include <stdint.h> | |
44 | #include <string.h> | |
45 | #include <sys/errno.h> | |
46 | #include <sys/monotonic.h> | |
0a7de745 A |
47 | |
48 | /* | |
49 | * Ensure that control registers read back what was written under MACH_ASSERT | |
50 | * kernels. | |
51 | * | |
52 | * A static inline function cannot be used due to passing the register through | |
53 | * the builtin -- it requires a constant string as its first argument, since | |
54 | * MSRs registers are encoded as an immediate in the instruction. | |
55 | */ | |
56 | #if MACH_ASSERT | |
57 | #define CTRL_REG_SET(reg, val) do { \ | |
58 | __builtin_arm_wsr64((reg), (val)); \ | |
59 | uint64_t __check_reg = __builtin_arm_rsr64((reg)); \ | |
60 | if (__check_reg != (val)) { \ | |
61 | panic("value written to %s was not read back (wrote %llx, read %llx)", \ | |
62 | #reg, (val), __check_reg); \ | |
63 | } \ | |
64 | } while (0) | |
65 | #else /* MACH_ASSERT */ | |
66 | #define CTRL_REG_SET(reg, val) __builtin_arm_wsr64((reg), (val)) | |
67 | #endif /* MACH_ASSERT */ | |
5ba3f43e A |
68 | |
69 | #pragma mark core counters | |
70 | ||
71 | bool mt_core_supported = true; | |
5ba3f43e | 72 | |
f427ee49 A |
73 | static const ml_topology_info_t *topology_info; |
74 | ||
5ba3f43e A |
75 | /* |
76 | * PMC[0-1] are the 48-bit fixed counters -- PMC0 is cycles and PMC1 is | |
77 | * instructions (see arm64/monotonic.h). | |
78 | * | |
79 | * PMC2+ are currently handled by kpc. | |
80 | */ | |
ea3f0419 A |
81 | #define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \ |
82 | X(6, A); X(7, A) | |
83 | ||
84 | #if CORE_NCTRS > 8 | |
ea3f0419 A |
85 | #define PMC_8_9(X, A) X(8, A); X(9, A) |
86 | #else // CORE_NCTRS > 8 | |
87 | #define PMC_8_9(X, A) | |
88 | #endif // CORE_NCTRS > 8 | |
89 | ||
90 | #define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A) | |
5ba3f43e | 91 | |
d9a64523 A |
92 | #define CTR_MAX ((UINT64_C(1) << 47) - 1) |
93 | ||
5ba3f43e A |
94 | #define CYCLES 0 |
95 | #define INSTRS 1 | |
96 | ||
97 | /* | |
98 | * PMC0's offset into a core's PIO range. | |
99 | * | |
100 | * This allows cores to remotely query another core's counters. | |
101 | */ | |
102 | ||
103 | #define PIO_PMC0_OFFSET (0x200) | |
104 | ||
105 | /* | |
106 | * The offset of the counter in the configuration registers. Post-Hurricane | |
107 | * devices have additional counters that need a larger shift than the original | |
108 | * counters. | |
109 | * | |
110 | * XXX For now, just support the lower-numbered counters. | |
111 | */ | |
112 | #define CTR_POS(CTR) (CTR) | |
113 | ||
114 | /* | |
115 | * PMCR0 is the main control register for the performance monitor. It | |
116 | * controls whether the counters are enabled, how they deliver interrupts, and | |
117 | * other features. | |
118 | */ | |
119 | ||
5ba3f43e A |
120 | #define PMCR0_CTR_EN(CTR) (UINT64_C(1) << CTR_POS(CTR)) |
121 | #define PMCR0_FIXED_EN (PMCR0_CTR_EN(CYCLES) | PMCR0_CTR_EN(INSTRS)) | |
122 | /* how interrupts are delivered on a PMI */ | |
123 | enum { | |
124 | PMCR0_INTGEN_OFF = 0, | |
125 | PMCR0_INTGEN_PMI = 1, | |
126 | PMCR0_INTGEN_AIC = 2, | |
127 | PMCR0_INTGEN_HALT = 3, | |
128 | PMCR0_INTGEN_FIQ = 4, | |
129 | }; | |
ea3f0419 | 130 | #define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8) |
0a7de745 A |
131 | |
132 | #if CPMU_AIC_PMI | |
133 | #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC) | |
134 | #else /* CPMU_AIC_PMI */ | |
d9a64523 | 135 | #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ) |
0a7de745 A |
136 | #endif /* !CPMU_AIC_PMI */ |
137 | ||
ea3f0419 A |
138 | #define PMCR0_PMI_SHIFT (12) |
139 | #define PMCR0_CTR_GE8_PMI_SHIFT (44) | |
140 | #define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR))) | |
d9a64523 | 141 | /* fixed counters are always counting */ |
5ba3f43e | 142 | #define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS)) |
d9a64523 | 143 | /* disable counting on a PMI */ |
5ba3f43e A |
144 | #define PMCR0_DISCNT_EN (UINT64_C(1) << 20) |
145 | /* block PMIs until ERET retires */ | |
146 | #define PMCR0_WFRFE_EN (UINT64_C(1) << 22) | |
147 | /* count global (not just core-local) L2C events */ | |
148 | #define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23) | |
149 | /* user mode access to configuration registers */ | |
150 | #define PMCR0_USEREN_EN (UINT64_C(1) << 30) | |
ea3f0419 | 151 | #define PMCR0_CTR_GE8_EN_SHIFT (32) |
5ba3f43e | 152 | |
ea3f0419 | 153 | #define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT) |
5ba3f43e A |
154 | |
155 | /* | |
156 | * PMCR1 controls which execution modes count events. | |
157 | */ | |
5ba3f43e A |
158 | #define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR))) |
159 | #define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR))) | |
160 | #define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR))) | |
161 | /* PMCR1_EL3A64 is not supported on systems with no monitor */ | |
162 | #if defined(APPLEHURRICANE) | |
163 | #define PMCR1_EL3A64_EN(CTR) UINT64_C(0) | |
164 | #else | |
165 | #define PMCR1_EL3A64_EN(CTR) (UINT64_C(1) << (24 + CTR_POS(CTR))) | |
166 | #endif | |
167 | #define PMCR1_ALL_EN(CTR) (PMCR1_EL0A32_EN(CTR) | PMCR1_EL0A64_EN(CTR) | \ | |
0a7de745 | 168 | PMCR1_EL1A64_EN(CTR) | PMCR1_EL3A64_EN(CTR)) |
5ba3f43e A |
169 | |
170 | /* fixed counters always count in all modes */ | |
171 | #define PMCR1_INIT (PMCR1_ALL_EN(CYCLES) | PMCR1_ALL_EN(INSTRS)) | |
172 | ||
173 | static inline void | |
174 | core_init_execution_modes(void) | |
175 | { | |
176 | uint64_t pmcr1; | |
177 | ||
c3c9b80d | 178 | pmcr1 = __builtin_arm_rsr64("PMCR1_EL1"); |
5ba3f43e | 179 | pmcr1 |= PMCR1_INIT; |
c3c9b80d | 180 | __builtin_arm_wsr64("PMCR1_EL1", pmcr1); |
5ba3f43e A |
181 | } |
182 | ||
0a7de745 | 183 | #define PMSR_OVF(CTR) (1ULL << (CTR)) |
5ba3f43e A |
184 | |
185 | static int | |
d9a64523 | 186 | core_init(__unused mt_device_t dev) |
5ba3f43e A |
187 | { |
188 | /* the dev node interface to the core counters is still unsupported */ | |
189 | return ENOTSUP; | |
190 | } | |
191 | ||
192 | struct mt_cpu * | |
193 | mt_cur_cpu(void) | |
194 | { | |
195 | return &getCpuDatap()->cpu_monotonic; | |
196 | } | |
197 | ||
198 | uint64_t | |
199 | mt_core_snap(unsigned int ctr) | |
200 | { | |
201 | switch (ctr) { | |
c3c9b80d | 202 | #define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(PMC ## CTR)) |
ea3f0419 A |
203 | PMC_ALL(PMC_RD, 0); |
204 | #undef PMC_RD | |
5ba3f43e A |
205 | default: |
206 | panic("monotonic: invalid core counter read: %u", ctr); | |
d9a64523 | 207 | __builtin_unreachable(); |
5ba3f43e A |
208 | } |
209 | } | |
210 | ||
211 | void | |
212 | mt_core_set_snap(unsigned int ctr, uint64_t count) | |
213 | { | |
214 | switch (ctr) { | |
215 | case 0: | |
c3c9b80d | 216 | __builtin_arm_wsr64("PMC0", count); |
5ba3f43e A |
217 | break; |
218 | case 1: | |
c3c9b80d | 219 | __builtin_arm_wsr64("PMC1", count); |
5ba3f43e A |
220 | break; |
221 | default: | |
222 | panic("monotonic: invalid core counter %u write %llu", ctr, count); | |
d9a64523 | 223 | __builtin_unreachable(); |
5ba3f43e A |
224 | } |
225 | } | |
226 | ||
227 | static void | |
228 | core_set_enabled(void) | |
229 | { | |
c3c9b80d | 230 | uint64_t pmcr0 = __builtin_arm_rsr64("PMCR0_EL1"); |
5ba3f43e | 231 | pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN; |
ea3f0419 A |
232 | |
233 | if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) { | |
234 | uint64_t kpc_ctrs = kpc_get_configurable_pmc_mask( | |
235 | KPC_CLASS_CONFIGURABLE_MASK) << MT_CORE_NFIXED; | |
236 | #if KPC_ARM64_CONFIGURABLE_COUNT > 6 | |
237 | uint64_t ctrs_ge8 = kpc_ctrs >> 8; | |
238 | pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_EN_SHIFT; | |
239 | pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_PMI_SHIFT; | |
240 | kpc_ctrs &= (1ULL << 8) - 1; | |
241 | #endif /* KPC_ARM64_CONFIGURABLE_COUNT > 6 */ | |
242 | kpc_ctrs |= kpc_ctrs << PMCR0_PMI_SHIFT; | |
243 | pmcr0 |= kpc_ctrs; | |
244 | } | |
245 | ||
c3c9b80d | 246 | __builtin_arm_wsr64("PMCR0_EL1", pmcr0); |
0a7de745 A |
247 | #if MACH_ASSERT |
248 | /* | |
249 | * Only check for the values that were ORed in. | |
250 | */ | |
c3c9b80d | 251 | uint64_t pmcr0_check = __builtin_arm_rsr64("PMCR0_EL1"); |
ea3f0419 A |
252 | if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) { |
253 | panic("monotonic: hardware ignored enable (read %llx, wrote %llx)", | |
254 | pmcr0_check, pmcr0); | |
0a7de745 A |
255 | } |
256 | #endif /* MACH_ASSERT */ | |
5ba3f43e A |
257 | } |
258 | ||
259 | static void | |
260 | core_idle(__unused cpu_data_t *cpu) | |
261 | { | |
262 | assert(cpu != NULL); | |
263 | assert(ml_get_interrupts_enabled() == FALSE); | |
264 | ||
265 | #if DEBUG | |
c3c9b80d | 266 | uint64_t pmcr0 = __builtin_arm_rsr64("PMCR0_EL1"); |
5ba3f43e | 267 | if ((pmcr0 & PMCR0_FIXED_EN) == 0) { |
0a7de745 | 268 | panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0); |
5ba3f43e | 269 | } |
c3c9b80d | 270 | uint64_t pmcr1 = __builtin_arm_rsr64("PMCR1_EL1"); |
5ba3f43e | 271 | if ((pmcr1 & PMCR1_INIT) == 0) { |
0a7de745 | 272 | panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1); |
5ba3f43e A |
273 | } |
274 | #endif /* DEBUG */ | |
275 | ||
276 | /* disable counters before updating */ | |
c3c9b80d | 277 | __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT); |
5ba3f43e A |
278 | |
279 | mt_update_fixed_counts(); | |
280 | } | |
281 | ||
d9a64523 A |
282 | #pragma mark uncore performance monitor |
283 | ||
c6bf4f31 A |
284 | #if HAS_UNCORE_CTRS |
285 | ||
286 | static bool mt_uncore_initted = false; | |
287 | ||
288 | /* | |
289 | * Uncore Performance Monitor | |
290 | * | |
291 | * Uncore performance monitors provide event-counting for the last-level caches | |
292 | * (LLCs). Each LLC has its own uncore performance monitor, which can only be | |
293 | * accessed by cores that use that LLC. Like the core performance monitoring | |
294 | * unit, uncore counters are configured globally. If there is more than one | |
295 | * LLC on the system, PIO reads must be used to satisfy uncore requests (using | |
296 | * the `_r` remote variants of the access functions). Otherwise, local MSRs | |
297 | * suffice (using the `_l` local variants of the access functions). | |
298 | */ | |
299 | ||
300 | #if UNCORE_PER_CLUSTER | |
f427ee49 A |
301 | #define MAX_NMONITORS MAX_CPU_CLUSTERS |
302 | static uintptr_t cpm_impl[MAX_NMONITORS] = {}; | |
303 | #else | |
304 | #define MAX_NMONITORS (1) | |
c6bf4f31 A |
305 | #endif /* UNCORE_PER_CLUSTER */ |
306 | ||
307 | #if UNCORE_VERSION >= 2 | |
308 | /* | |
309 | * V2 uncore monitors feature a CTI mechanism -- the second bit of UPMSR is | |
310 | * used to track if a CTI has been triggered due to an overflow. | |
311 | */ | |
312 | #define UPMSR_OVF_POS 2 | |
313 | #else /* UNCORE_VERSION >= 2 */ | |
314 | #define UPMSR_OVF_POS 1 | |
315 | #endif /* UNCORE_VERSION < 2 */ | |
316 | #define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1) | |
317 | #define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS) | |
318 | ||
c6bf4f31 A |
319 | #define UPMPCM_CORE(ID) (UINT64_C(1) << (ID)) |
320 | ||
321 | /* | |
322 | * The uncore_pmi_mask is a bitmask of CPUs that receive uncore PMIs. It's | |
323 | * initialized by uncore_init and controllable by the uncore_pmi_mask boot-arg. | |
324 | */ | |
325 | static int32_t uncore_pmi_mask = 0; | |
326 | ||
327 | /* | |
328 | * The uncore_active_ctrs is a bitmask of uncore counters that are currently | |
329 | * requested. | |
330 | */ | |
331 | static uint16_t uncore_active_ctrs = 0; | |
332 | static_assert(sizeof(uncore_active_ctrs) * CHAR_BIT >= UNCORE_NCTRS, | |
333 | "counter mask should fit the full range of counters"); | |
334 | ||
335 | /* | |
336 | * mt_uncore_enabled is true when any uncore counters are active. | |
337 | */ | |
338 | bool mt_uncore_enabled = false; | |
339 | ||
c6bf4f31 A |
340 | /* |
341 | * The uncore_events are the event configurations for each uncore counter -- as | |
342 | * a union to make it easy to program the hardware registers. | |
343 | */ | |
344 | static struct uncore_config { | |
345 | union { | |
346 | uint8_t uce_ctrs[UNCORE_NCTRS]; | |
347 | uint64_t uce_regs[UNCORE_NCTRS / 8]; | |
348 | } uc_events; | |
349 | union { | |
350 | uint16_t uccm_masks[UNCORE_NCTRS]; | |
351 | uint64_t uccm_regs[UNCORE_NCTRS / 4]; | |
f427ee49 | 352 | } uc_cpu_masks[MAX_NMONITORS]; |
c6bf4f31 A |
353 | } uncore_config; |
354 | ||
355 | static struct uncore_monitor { | |
356 | /* | |
357 | * The last snapshot of each of the hardware counter values. | |
358 | */ | |
359 | uint64_t um_snaps[UNCORE_NCTRS]; | |
360 | ||
361 | /* | |
362 | * The accumulated counts for each counter. | |
363 | */ | |
364 | uint64_t um_counts[UNCORE_NCTRS]; | |
365 | ||
366 | /* | |
367 | * Protects accessing the hardware registers and fields in this structure. | |
368 | */ | |
369 | lck_spin_t um_lock; | |
370 | ||
371 | /* | |
372 | * Whether this monitor needs its registers restored after wake. | |
373 | */ | |
374 | bool um_sleeping; | |
f427ee49 A |
375 | } uncore_monitors[MAX_NMONITORS]; |
376 | ||
377 | /* | |
378 | * Each uncore unit has its own monitor, corresponding to the memory hierarchy | |
379 | * of the LLCs. | |
380 | */ | |
381 | static unsigned int | |
382 | uncore_nmonitors(void) | |
383 | { | |
384 | #if UNCORE_PER_CLUSTER | |
385 | return topology_info->num_clusters; | |
386 | #else /* UNCORE_PER_CLUSTER */ | |
387 | return 1; | |
388 | #endif /* !UNCORE_PER_CLUSTER */ | |
389 | } | |
c6bf4f31 A |
390 | |
391 | static unsigned int | |
392 | uncmon_get_curid(void) | |
393 | { | |
394 | #if UNCORE_PER_CLUSTER | |
f427ee49 | 395 | // Pointer arithmetic to translate cluster_id into a clusters[] index. |
c6bf4f31 A |
396 | return cpu_cluster_id(); |
397 | #else /* UNCORE_PER_CLUSTER */ | |
398 | return 0; | |
399 | #endif /* !UNCORE_PER_CLUSTER */ | |
400 | } | |
401 | ||
402 | /* | |
403 | * Per-monitor locks are required to prevent races with the PMI handlers, not | |
404 | * from other CPUs that are configuring (those are serialized with monotonic's | |
405 | * per-device lock). | |
406 | */ | |
407 | ||
408 | static int | |
409 | uncmon_lock(struct uncore_monitor *mon) | |
410 | { | |
411 | int intrs_en = ml_set_interrupts_enabled(FALSE); | |
412 | lck_spin_lock(&mon->um_lock); | |
413 | return intrs_en; | |
414 | } | |
415 | ||
416 | static void | |
417 | uncmon_unlock(struct uncore_monitor *mon, int intrs_en) | |
418 | { | |
419 | lck_spin_unlock(&mon->um_lock); | |
420 | (void)ml_set_interrupts_enabled(intrs_en); | |
421 | } | |
422 | ||
423 | /* | |
424 | * Helper functions for accessing the hardware -- these require the monitor be | |
425 | * locked to prevent other CPUs' PMI handlers from making local modifications | |
426 | * or updating the counts. | |
427 | */ | |
428 | ||
429 | #if UNCORE_VERSION >= 2 | |
430 | #define UPMCR0_INTEN_POS 20 | |
431 | #define UPMCR0_INTGEN_POS 16 | |
432 | #else /* UNCORE_VERSION >= 2 */ | |
433 | #define UPMCR0_INTEN_POS 12 | |
434 | #define UPMCR0_INTGEN_POS 8 | |
435 | #endif /* UNCORE_VERSION < 2 */ | |
436 | enum { | |
437 | UPMCR0_INTGEN_OFF = 0, | |
438 | /* fast PMIs are only supported on core CPMU */ | |
439 | UPMCR0_INTGEN_AIC = 2, | |
440 | UPMCR0_INTGEN_HALT = 3, | |
441 | UPMCR0_INTGEN_FIQ = 4, | |
442 | }; | |
443 | /* always enable interrupts for all counters */ | |
444 | #define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS) | |
445 | /* route uncore PMIs through the FIQ path */ | |
446 | #define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS)) | |
447 | ||
448 | /* | |
449 | * Turn counting on for counters set in the `enctrmask` and off, otherwise. | |
450 | */ | |
451 | static inline void | |
452 | uncmon_set_counting_locked_l(__unused unsigned int monid, uint64_t enctrmask) | |
453 | { | |
454 | /* | |
455 | * UPMCR0 controls which counters are enabled and how interrupts are generated | |
456 | * for overflows. | |
457 | */ | |
c3c9b80d | 458 | __builtin_arm_wsr64("UPMCR0_EL1", UPMCR0_INIT | enctrmask); |
c6bf4f31 A |
459 | } |
460 | ||
461 | #if UNCORE_PER_CLUSTER | |
462 | ||
463 | /* | |
464 | * Turn counting on for counters set in the `enctrmask` and off, otherwise. | |
465 | */ | |
466 | static inline void | |
467 | uncmon_set_counting_locked_r(unsigned int monid, uint64_t enctrmask) | |
468 | { | |
469 | const uintptr_t upmcr0_offset = 0x4180; | |
470 | *(uint64_t *)(cpm_impl[monid] + upmcr0_offset) = UPMCR0_INIT | enctrmask; | |
471 | } | |
472 | ||
473 | #endif /* UNCORE_PER_CLUSTER */ | |
474 | ||
475 | /* | |
476 | * The uncore performance monitoring counters (UPMCs) are 48-bits wide. The | |
477 | * high bit is an overflow bit, triggering a PMI, providing 47 usable bits. | |
478 | */ | |
479 | ||
480 | #define UPMC_MAX ((UINT64_C(1) << 48) - 1) | |
481 | ||
482 | /* | |
483 | * The `__builtin_arm_{r,w}sr` functions require constant strings, since the | |
484 | * MSR/MRS instructions encode the registers as immediates. Otherwise, this | |
485 | * would be indexing into an array of strings. | |
486 | */ | |
487 | ||
c6bf4f31 A |
488 | #define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \ |
489 | X(6, A); X(7, A) | |
490 | #if UNCORE_NCTRS <= 8 | |
491 | #define UPMC_ALL(X, A) UPMC_0_7(X, A) | |
492 | #else /* UNCORE_NCTRS <= 8 */ | |
493 | #define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \ | |
494 | X(13, A); X(14, A); X(15, A) | |
495 | #define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A) | |
496 | #endif /* UNCORE_NCTRS > 8 */ | |
497 | ||
498 | static inline uint64_t | |
499 | uncmon_read_counter_locked_l(__unused unsigned int monid, unsigned int ctr) | |
500 | { | |
501 | assert(ctr < UNCORE_NCTRS); | |
502 | switch (ctr) { | |
c3c9b80d | 503 | #define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(UPMC ## CTR)) |
c6bf4f31 A |
504 | UPMC_ALL(UPMC_RD, 0); |
505 | #undef UPMC_RD | |
506 | default: | |
507 | panic("monotonic: invalid counter read %u", ctr); | |
508 | __builtin_unreachable(); | |
509 | } | |
510 | } | |
511 | ||
512 | static inline void | |
513 | uncmon_write_counter_locked_l(__unused unsigned int monid, unsigned int ctr, | |
514 | uint64_t count) | |
515 | { | |
516 | assert(count < UPMC_MAX); | |
517 | assert(ctr < UNCORE_NCTRS); | |
518 | switch (ctr) { | |
519 | #define UPMC_WR(CTR, COUNT) case (CTR): \ | |
c3c9b80d | 520 | return __builtin_arm_wsr64(__MSR_STR(UPMC ## CTR), (COUNT)) |
c6bf4f31 A |
521 | UPMC_ALL(UPMC_WR, count); |
522 | #undef UPMC_WR | |
523 | default: | |
524 | panic("monotonic: invalid counter write %u", ctr); | |
525 | } | |
526 | } | |
527 | ||
528 | #if UNCORE_PER_CLUSTER | |
529 | ||
c6bf4f31 A |
530 | uintptr_t upmc_offs[UNCORE_NCTRS] = { |
531 | [0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120, | |
532 | [5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268, | |
533 | [10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278, | |
534 | [14] = 0x4170, [15] = 0x4280, | |
535 | }; | |
536 | ||
537 | static inline uint64_t | |
538 | uncmon_read_counter_locked_r(unsigned int mon_id, unsigned int ctr) | |
539 | { | |
f427ee49 | 540 | assert(mon_id < uncore_nmonitors()); |
c6bf4f31 A |
541 | assert(ctr < UNCORE_NCTRS); |
542 | return *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]); | |
543 | } | |
544 | ||
545 | static inline void | |
546 | uncmon_write_counter_locked_r(unsigned int mon_id, unsigned int ctr, | |
547 | uint64_t count) | |
548 | { | |
549 | assert(count < UPMC_MAX); | |
550 | assert(ctr < UNCORE_NCTRS); | |
f427ee49 | 551 | assert(mon_id < uncore_nmonitors()); |
c6bf4f31 A |
552 | *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]) = count; |
553 | } | |
554 | ||
555 | #endif /* UNCORE_PER_CLUSTER */ | |
556 | ||
557 | static inline void | |
558 | uncmon_update_locked(unsigned int monid, unsigned int curid, unsigned int ctr) | |
559 | { | |
560 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
561 | uint64_t snap = 0; | |
562 | if (curid == monid) { | |
563 | snap = uncmon_read_counter_locked_l(monid, ctr); | |
564 | } else { | |
565 | #if UNCORE_PER_CLUSTER | |
566 | snap = uncmon_read_counter_locked_r(monid, ctr); | |
567 | #endif /* UNCORE_PER_CLUSTER */ | |
568 | } | |
569 | /* counters should increase monotonically */ | |
570 | assert(snap >= mon->um_snaps[ctr]); | |
571 | mon->um_counts[ctr] += snap - mon->um_snaps[ctr]; | |
572 | mon->um_snaps[ctr] = snap; | |
573 | } | |
574 | ||
575 | static inline void | |
576 | uncmon_program_events_locked_l(unsigned int monid) | |
577 | { | |
578 | /* | |
579 | * UPMESR[01] is the event selection register that determines which event a | |
580 | * counter will count. | |
581 | */ | |
c3c9b80d | 582 | CTRL_REG_SET("UPMESR0_EL1", uncore_config.uc_events.uce_regs[0]); |
c6bf4f31 A |
583 | |
584 | #if UNCORE_NCTRS > 8 | |
c3c9b80d | 585 | CTRL_REG_SET("UPMESR1_EL1", uncore_config.uc_events.uce_regs[1]); |
c6bf4f31 A |
586 | #endif /* UNCORE_NCTRS > 8 */ |
587 | ||
588 | /* | |
589 | * UPMECM[0123] are the event core masks for each counter -- whether or not | |
590 | * that counter counts events generated by an agent. These are set to all | |
591 | * ones so the uncore counters count events from all cores. | |
592 | * | |
593 | * The bits are based off the start of the cluster -- e.g. even if a core | |
594 | * has a CPU ID of 4, it might be the first CPU in a cluster. Shift the | |
595 | * registers right by the ID of the first CPU in the cluster. | |
596 | */ | |
c3c9b80d | 597 | CTRL_REG_SET("UPMECM0_EL1", |
c6bf4f31 | 598 | uncore_config.uc_cpu_masks[monid].uccm_regs[0]); |
c3c9b80d | 599 | CTRL_REG_SET("UPMECM1_EL1", |
c6bf4f31 A |
600 | uncore_config.uc_cpu_masks[monid].uccm_regs[1]); |
601 | ||
602 | #if UNCORE_NCTRS > 8 | |
c3c9b80d | 603 | CTRL_REG_SET("UPMECM2_EL1", |
c6bf4f31 | 604 | uncore_config.uc_cpu_masks[monid].uccm_regs[2]); |
c3c9b80d | 605 | CTRL_REG_SET("UPMECM3_EL1", |
c6bf4f31 A |
606 | uncore_config.uc_cpu_masks[monid].uccm_regs[3]); |
607 | #endif /* UNCORE_NCTRS > 8 */ | |
608 | } | |
609 | ||
610 | #if UNCORE_PER_CLUSTER | |
611 | ||
612 | static inline void | |
613 | uncmon_program_events_locked_r(unsigned int monid) | |
614 | { | |
615 | const uintptr_t upmesr_offs[2] = {[0] = 0x41b0, [1] = 0x41b8, }; | |
616 | ||
617 | for (unsigned int i = 0; i < sizeof(upmesr_offs) / sizeof(upmesr_offs[0]); | |
618 | i++) { | |
619 | *(uint64_t *)(cpm_impl[monid] + upmesr_offs[i]) = | |
620 | uncore_config.uc_events.uce_regs[i]; | |
621 | } | |
622 | ||
623 | const uintptr_t upmecm_offs[4] = { | |
624 | [0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8, | |
625 | }; | |
626 | ||
627 | for (unsigned int i = 0; i < sizeof(upmecm_offs) / sizeof(upmecm_offs[0]); | |
628 | i++) { | |
629 | *(uint64_t *)(cpm_impl[monid] + upmecm_offs[i]) = | |
630 | uncore_config.uc_cpu_masks[monid].uccm_regs[i]; | |
631 | } | |
632 | } | |
633 | ||
634 | #endif /* UNCORE_PER_CLUSTER */ | |
635 | ||
636 | static void | |
637 | uncmon_clear_int_locked_l(__unused unsigned int monid) | |
638 | { | |
c3c9b80d | 639 | __builtin_arm_wsr64("UPMSR_EL1", 0); |
c6bf4f31 A |
640 | } |
641 | ||
642 | #if UNCORE_PER_CLUSTER | |
643 | ||
644 | static void | |
645 | uncmon_clear_int_locked_r(unsigned int monid) | |
646 | { | |
647 | const uintptr_t upmsr_off = 0x41c0; | |
648 | *(uint64_t *)(cpm_impl[monid] + upmsr_off) = 0; | |
649 | } | |
650 | ||
651 | #endif /* UNCORE_PER_CLUSTER */ | |
652 | ||
653 | /* | |
654 | * Get the PMI mask for the provided `monid` -- that is, the bitmap of CPUs | |
655 | * that should be sent PMIs for a particular monitor. | |
656 | */ | |
657 | static uint64_t | |
658 | uncmon_get_pmi_mask(unsigned int monid) | |
659 | { | |
660 | uint64_t pmi_mask = uncore_pmi_mask; | |
661 | ||
662 | #if UNCORE_PER_CLUSTER | |
f427ee49 | 663 | pmi_mask &= topology_info->clusters[monid].cpu_mask; |
c6bf4f31 A |
664 | #else /* UNCORE_PER_CLUSTER */ |
665 | #pragma unused(monid) | |
666 | #endif /* !UNCORE_PER_CLUSTER */ | |
667 | ||
668 | return pmi_mask; | |
669 | } | |
670 | ||
671 | /* | |
672 | * Initialization routines for the uncore counters. | |
673 | */ | |
674 | ||
675 | static void | |
676 | uncmon_init_locked_l(unsigned int monid) | |
677 | { | |
678 | /* | |
679 | * UPMPCM defines the PMI core mask for the UPMCs -- which cores should | |
680 | * receive interrupts on overflow. | |
681 | */ | |
c3c9b80d | 682 | CTRL_REG_SET("UPMPCM_EL1", uncmon_get_pmi_mask(monid)); |
c6bf4f31 A |
683 | uncmon_set_counting_locked_l(monid, |
684 | mt_uncore_enabled ? uncore_active_ctrs : 0); | |
685 | } | |
686 | ||
687 | #if UNCORE_PER_CLUSTER | |
688 | ||
f427ee49 | 689 | static uintptr_t acc_impl[MAX_NMONITORS] = {}; |
c6bf4f31 A |
690 | |
691 | static void | |
692 | uncmon_init_locked_r(unsigned int monid) | |
693 | { | |
694 | const uintptr_t upmpcm_off = 0x1010; | |
695 | ||
696 | *(uint64_t *)(acc_impl[monid] + upmpcm_off) = uncmon_get_pmi_mask(monid); | |
697 | uncmon_set_counting_locked_r(monid, | |
698 | mt_uncore_enabled ? uncore_active_ctrs : 0); | |
699 | } | |
700 | ||
701 | #endif /* UNCORE_PER_CLUSTER */ | |
702 | ||
703 | /* | |
704 | * Initialize the uncore device for monotonic. | |
705 | */ | |
706 | static int | |
707 | uncore_init(__unused mt_device_t dev) | |
708 | { | |
f427ee49 A |
709 | #if HAS_UNCORE_CTRS |
710 | assert(MT_NDEVS > 0); | |
711 | mt_devices[MT_NDEVS - 1].mtd_nmonitors = (uint8_t)uncore_nmonitors(); | |
712 | #endif | |
713 | ||
c6bf4f31 A |
714 | #if DEVELOPMENT || DEBUG |
715 | /* | |
716 | * Development and debug kernels observe the `uncore_pmi_mask` boot-arg, | |
717 | * allowing PMIs to be routed to the CPUs present in the supplied bitmap. | |
718 | * Do some sanity checks on the value provided. | |
719 | */ | |
720 | bool parsed_arg = PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask, | |
721 | sizeof(uncore_pmi_mask)); | |
722 | if (parsed_arg) { | |
723 | #if UNCORE_PER_CLUSTER | |
f427ee49 | 724 | if (__builtin_popcount(uncore_pmi_mask) != (int)uncore_nmonitors()) { |
c6bf4f31 A |
725 | panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask); |
726 | } | |
f427ee49 | 727 | for (unsigned int i = 0; i < uncore_nmonitors(); i++) { |
c6bf4f31 A |
728 | if (__builtin_popcountll(uncmon_get_pmi_mask(i)) != 1) { |
729 | panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x", | |
730 | i, uncore_pmi_mask); | |
731 | } | |
732 | } | |
733 | #else /* UNCORE_PER_CLUSTER */ | |
734 | if (__builtin_popcount(uncore_pmi_mask) != 1) { | |
735 | panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask); | |
736 | } | |
737 | #endif /* !UNCORE_PER_CLUSTER */ | |
738 | } else | |
739 | #endif /* DEVELOPMENT || DEBUG */ | |
740 | { | |
741 | #if UNCORE_PER_CLUSTER | |
f427ee49 A |
742 | for (unsigned int i = 0; i < topology_info->num_clusters; i++) { |
743 | uncore_pmi_mask |= 1ULL << topology_info->clusters[i].first_cpu_id; | |
c6bf4f31 A |
744 | } |
745 | #else /* UNCORE_PER_CLUSTER */ | |
746 | /* arbitrarily route to core 0 */ | |
747 | uncore_pmi_mask |= 1; | |
748 | #endif /* !UNCORE_PER_CLUSTER */ | |
749 | } | |
750 | assert(uncore_pmi_mask != 0); | |
751 | ||
752 | unsigned int curmonid = uncmon_get_curid(); | |
753 | ||
f427ee49 | 754 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 | 755 | #if UNCORE_PER_CLUSTER |
f427ee49 A |
756 | ml_topology_cluster_t *cluster = &topology_info->clusters[monid]; |
757 | cpm_impl[monid] = (uintptr_t)cluster->cpm_IMPL_regs; | |
758 | acc_impl[monid] = (uintptr_t)cluster->acc_IMPL_regs; | |
759 | assert(cpm_impl[monid] != 0 && acc_impl[monid] != 0); | |
c6bf4f31 A |
760 | #endif /* UNCORE_PER_CLUSTER */ |
761 | ||
762 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
c3c9b80d | 763 | lck_spin_init(&mon->um_lock, &mt_lock_grp, LCK_ATTR_NULL); |
c6bf4f31 A |
764 | |
765 | int intrs_en = uncmon_lock(mon); | |
766 | if (monid != curmonid) { | |
767 | #if UNCORE_PER_CLUSTER | |
768 | uncmon_init_locked_r(monid); | |
769 | #endif /* UNCORE_PER_CLUSTER */ | |
770 | } else { | |
771 | uncmon_init_locked_l(monid); | |
772 | } | |
773 | uncmon_unlock(mon, intrs_en); | |
774 | } | |
775 | ||
776 | mt_uncore_initted = true; | |
777 | ||
778 | return 0; | |
779 | } | |
780 | ||
781 | /* | |
782 | * Support for monotonic's mtd_read function. | |
783 | */ | |
784 | ||
785 | static void | |
786 | uncmon_read_all_counters(unsigned int monid, unsigned int curmonid, | |
787 | uint64_t ctr_mask, uint64_t *counts) | |
788 | { | |
789 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
790 | ||
791 | int intrs_en = uncmon_lock(mon); | |
792 | ||
793 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
794 | if (ctr_mask & (1ULL << ctr)) { | |
795 | uncmon_update_locked(monid, curmonid, ctr); | |
796 | counts[ctr] = mon->um_counts[ctr]; | |
797 | } | |
798 | } | |
799 | ||
800 | uncmon_unlock(mon, intrs_en); | |
801 | } | |
802 | ||
803 | /* | |
804 | * Read all monitor's counters. | |
805 | */ | |
806 | static int | |
807 | uncore_read(uint64_t ctr_mask, uint64_t *counts_out) | |
808 | { | |
809 | assert(ctr_mask != 0); | |
810 | assert(counts_out != NULL); | |
811 | ||
812 | if (!uncore_active_ctrs) { | |
813 | return EPWROFF; | |
814 | } | |
815 | if (ctr_mask & ~uncore_active_ctrs) { | |
816 | return EINVAL; | |
817 | } | |
818 | ||
819 | unsigned int curmonid = uncmon_get_curid(); | |
f427ee49 | 820 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 A |
821 | /* |
822 | * Find this monitor's starting offset into the `counts_out` array. | |
823 | */ | |
824 | uint64_t *counts = counts_out + (UNCORE_NCTRS * monid); | |
825 | ||
826 | uncmon_read_all_counters(monid, curmonid, ctr_mask, counts); | |
827 | } | |
828 | ||
829 | return 0; | |
830 | } | |
831 | ||
832 | /* | |
833 | * Support for monotonic's mtd_add function. | |
834 | */ | |
835 | ||
836 | /* | |
837 | * Add an event to the current uncore configuration. This doesn't take effect | |
838 | * until the counters are enabled again, so there's no need to involve the | |
839 | * monitors. | |
840 | */ | |
841 | static int | |
842 | uncore_add(struct monotonic_config *config, uint32_t *ctr_out) | |
843 | { | |
844 | if (mt_uncore_enabled) { | |
845 | return EBUSY; | |
846 | } | |
847 | ||
848 | uint32_t available = ~uncore_active_ctrs & config->allowed_ctr_mask; | |
849 | ||
850 | if (available == 0) { | |
851 | return ENOSPC; | |
852 | } | |
853 | ||
854 | uint32_t valid_ctrs = (UINT32_C(1) << UNCORE_NCTRS) - 1; | |
855 | if ((available & valid_ctrs) == 0) { | |
856 | return E2BIG; | |
857 | } | |
858 | ||
859 | uint32_t ctr = __builtin_ffsll(available) - 1; | |
860 | ||
861 | uncore_active_ctrs |= UINT64_C(1) << ctr; | |
f427ee49 | 862 | uncore_config.uc_events.uce_ctrs[ctr] = (uint8_t)config->event; |
c6bf4f31 A |
863 | uint64_t cpu_mask = UINT64_MAX; |
864 | if (config->cpu_mask != 0) { | |
865 | cpu_mask = config->cpu_mask; | |
866 | } | |
f427ee49 | 867 | for (unsigned int i = 0; i < uncore_nmonitors(); i++) { |
c6bf4f31 | 868 | #if UNCORE_PER_CLUSTER |
f427ee49 | 869 | const unsigned int shift = topology_info->clusters[i].first_cpu_id; |
c6bf4f31 A |
870 | #else /* UNCORE_PER_CLUSTER */ |
871 | const unsigned int shift = 0; | |
872 | #endif /* !UNCORE_PER_CLUSTER */ | |
f427ee49 | 873 | uncore_config.uc_cpu_masks[i].uccm_masks[ctr] = (uint16_t)(cpu_mask >> shift); |
c6bf4f31 A |
874 | } |
875 | ||
876 | *ctr_out = ctr; | |
877 | return 0; | |
878 | } | |
879 | ||
880 | /* | |
881 | * Support for monotonic's mtd_reset function. | |
882 | */ | |
883 | ||
884 | /* | |
885 | * Reset all configuration and disable the counters if they're currently | |
886 | * counting. | |
887 | */ | |
888 | static void | |
889 | uncore_reset(void) | |
890 | { | |
891 | mt_uncore_enabled = false; | |
892 | ||
893 | unsigned int curmonid = uncmon_get_curid(); | |
894 | ||
f427ee49 | 895 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 A |
896 | struct uncore_monitor *mon = &uncore_monitors[monid]; |
897 | bool remote = monid != curmonid; | |
898 | ||
899 | int intrs_en = uncmon_lock(mon); | |
900 | if (remote) { | |
901 | #if UNCORE_PER_CLUSTER | |
902 | uncmon_set_counting_locked_r(monid, 0); | |
903 | #endif /* UNCORE_PER_CLUSTER */ | |
904 | } else { | |
905 | uncmon_set_counting_locked_l(monid, 0); | |
906 | } | |
907 | ||
908 | for (int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
909 | if (uncore_active_ctrs & (1U << ctr)) { | |
910 | if (remote) { | |
911 | #if UNCORE_PER_CLUSTER | |
912 | uncmon_write_counter_locked_r(monid, ctr, 0); | |
913 | #endif /* UNCORE_PER_CLUSTER */ | |
914 | } else { | |
915 | uncmon_write_counter_locked_l(monid, ctr, 0); | |
916 | } | |
917 | } | |
918 | } | |
919 | ||
920 | memset(&mon->um_snaps, 0, sizeof(mon->um_snaps)); | |
921 | memset(&mon->um_counts, 0, sizeof(mon->um_counts)); | |
922 | if (remote) { | |
923 | #if UNCORE_PER_CLUSTER | |
924 | uncmon_clear_int_locked_r(monid); | |
925 | #endif /* UNCORE_PER_CLUSTER */ | |
926 | } else { | |
927 | uncmon_clear_int_locked_l(monid); | |
928 | } | |
929 | ||
930 | uncmon_unlock(mon, intrs_en); | |
931 | } | |
932 | ||
933 | uncore_active_ctrs = 0; | |
934 | memset(&uncore_config, 0, sizeof(uncore_config)); | |
935 | ||
f427ee49 | 936 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 A |
937 | struct uncore_monitor *mon = &uncore_monitors[monid]; |
938 | bool remote = monid != curmonid; | |
939 | ||
940 | int intrs_en = uncmon_lock(mon); | |
941 | if (remote) { | |
942 | #if UNCORE_PER_CLUSTER | |
943 | uncmon_program_events_locked_r(monid); | |
944 | #endif /* UNCORE_PER_CLUSTER */ | |
945 | } else { | |
946 | uncmon_program_events_locked_l(monid); | |
947 | } | |
948 | uncmon_unlock(mon, intrs_en); | |
949 | } | |
950 | } | |
951 | ||
952 | /* | |
953 | * Support for monotonic's mtd_enable function. | |
954 | */ | |
955 | ||
956 | static void | |
957 | uncmon_set_enabled_l(unsigned int monid, bool enable) | |
958 | { | |
959 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
960 | int intrs_en = uncmon_lock(mon); | |
961 | ||
962 | if (enable) { | |
963 | uncmon_program_events_locked_l(monid); | |
964 | uncmon_set_counting_locked_l(monid, uncore_active_ctrs); | |
965 | } else { | |
966 | uncmon_set_counting_locked_l(monid, 0); | |
967 | } | |
968 | ||
969 | uncmon_unlock(mon, intrs_en); | |
970 | } | |
971 | ||
972 | #if UNCORE_PER_CLUSTER | |
973 | ||
974 | static void | |
975 | uncmon_set_enabled_r(unsigned int monid, bool enable) | |
976 | { | |
977 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
978 | int intrs_en = uncmon_lock(mon); | |
979 | ||
980 | if (enable) { | |
981 | uncmon_program_events_locked_r(monid); | |
982 | uncmon_set_counting_locked_r(monid, uncore_active_ctrs); | |
983 | } else { | |
984 | uncmon_set_counting_locked_r(monid, 0); | |
985 | } | |
986 | ||
987 | uncmon_unlock(mon, intrs_en); | |
988 | } | |
989 | ||
990 | #endif /* UNCORE_PER_CLUSTER */ | |
991 | ||
992 | static void | |
993 | uncore_set_enabled(bool enable) | |
994 | { | |
995 | mt_uncore_enabled = enable; | |
996 | ||
997 | unsigned int curmonid = uncmon_get_curid(); | |
f427ee49 | 998 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 A |
999 | if (monid != curmonid) { |
1000 | #if UNCORE_PER_CLUSTER | |
1001 | uncmon_set_enabled_r(monid, enable); | |
1002 | #endif /* UNCORE_PER_CLUSTER */ | |
1003 | } else { | |
1004 | uncmon_set_enabled_l(monid, enable); | |
1005 | } | |
1006 | } | |
1007 | } | |
1008 | ||
1009 | /* | |
1010 | * Hooks in the machine layer. | |
1011 | */ | |
1012 | ||
1013 | static void | |
1014 | uncore_fiq(uint64_t upmsr) | |
1015 | { | |
1016 | /* | |
1017 | * Determine which counters overflowed. | |
1018 | */ | |
1019 | uint64_t disable_ctr_mask = (upmsr & UPMSR_OVF_MASK) >> UPMSR_OVF_POS; | |
1020 | /* should not receive interrupts from inactive counters */ | |
1021 | assert(!(disable_ctr_mask & ~uncore_active_ctrs)); | |
1022 | ||
1023 | unsigned int monid = uncmon_get_curid(); | |
1024 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
1025 | ||
1026 | int intrs_en = uncmon_lock(mon); | |
1027 | ||
1028 | /* | |
1029 | * Disable any counters that overflowed. | |
1030 | */ | |
1031 | uncmon_set_counting_locked_l(monid, | |
1032 | uncore_active_ctrs & ~disable_ctr_mask); | |
1033 | ||
1034 | /* | |
1035 | * With the overflowing counters disabled, capture their counts and reset | |
1036 | * the UPMCs and their snapshots to 0. | |
1037 | */ | |
1038 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1039 | if (UPMSR_OVF(upmsr, ctr)) { | |
1040 | uncmon_update_locked(monid, monid, ctr); | |
1041 | mon->um_snaps[ctr] = 0; | |
1042 | uncmon_write_counter_locked_l(monid, ctr, 0); | |
1043 | } | |
1044 | } | |
1045 | ||
1046 | /* | |
1047 | * Acknowledge the interrupt, now that any overflowed PMCs have been reset. | |
1048 | */ | |
1049 | uncmon_clear_int_locked_l(monid); | |
1050 | ||
1051 | /* | |
1052 | * Re-enable all active counters. | |
1053 | */ | |
1054 | uncmon_set_counting_locked_l(monid, uncore_active_ctrs); | |
1055 | ||
1056 | uncmon_unlock(mon, intrs_en); | |
1057 | } | |
1058 | ||
1059 | static void | |
1060 | uncore_save(void) | |
1061 | { | |
1062 | if (!uncore_active_ctrs) { | |
1063 | return; | |
1064 | } | |
1065 | ||
1066 | unsigned int curmonid = uncmon_get_curid(); | |
1067 | ||
f427ee49 | 1068 | for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) { |
c6bf4f31 A |
1069 | struct uncore_monitor *mon = &uncore_monitors[monid]; |
1070 | int intrs_en = uncmon_lock(mon); | |
1071 | ||
1072 | if (mt_uncore_enabled) { | |
1073 | if (monid != curmonid) { | |
1074 | #if UNCORE_PER_CLUSTER | |
1075 | uncmon_set_counting_locked_r(monid, 0); | |
1076 | #endif /* UNCORE_PER_CLUSTER */ | |
1077 | } else { | |
1078 | uncmon_set_counting_locked_l(monid, 0); | |
1079 | } | |
1080 | } | |
1081 | ||
1082 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1083 | if (uncore_active_ctrs & (1U << ctr)) { | |
1084 | uncmon_update_locked(monid, curmonid, ctr); | |
1085 | } | |
1086 | } | |
1087 | ||
1088 | mon->um_sleeping = true; | |
1089 | uncmon_unlock(mon, intrs_en); | |
1090 | } | |
1091 | } | |
1092 | ||
1093 | static void | |
1094 | uncore_restore(void) | |
1095 | { | |
1096 | if (!uncore_active_ctrs) { | |
1097 | return; | |
1098 | } | |
1099 | unsigned int curmonid = uncmon_get_curid(); | |
1100 | ||
1101 | struct uncore_monitor *mon = &uncore_monitors[curmonid]; | |
1102 | int intrs_en = uncmon_lock(mon); | |
1103 | if (!mon->um_sleeping) { | |
1104 | goto out; | |
1105 | } | |
1106 | ||
1107 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1108 | if (uncore_active_ctrs & (1U << ctr)) { | |
1109 | uncmon_write_counter_locked_l(curmonid, ctr, mon->um_snaps[ctr]); | |
1110 | } | |
1111 | } | |
1112 | uncmon_program_events_locked_l(curmonid); | |
1113 | uncmon_init_locked_l(curmonid); | |
1114 | mon->um_sleeping = false; | |
1115 | ||
1116 | out: | |
1117 | uncmon_unlock(mon, intrs_en); | |
1118 | } | |
1119 | ||
c6bf4f31 | 1120 | #endif /* HAS_UNCORE_CTRS */ |
d9a64523 A |
1121 | |
1122 | #pragma mark common hooks | |
1123 | ||
cb323159 A |
1124 | void |
1125 | mt_early_init(void) | |
1126 | { | |
f427ee49 | 1127 | topology_info = ml_get_topology_info(); |
cb323159 A |
1128 | } |
1129 | ||
d9a64523 A |
1130 | void |
1131 | mt_cpu_idle(cpu_data_t *cpu) | |
1132 | { | |
1133 | core_idle(cpu); | |
1134 | } | |
1135 | ||
1136 | void | |
1137 | mt_cpu_run(cpu_data_t *cpu) | |
5ba3f43e | 1138 | { |
5ba3f43e A |
1139 | struct mt_cpu *mtc; |
1140 | ||
1141 | assert(cpu != NULL); | |
1142 | assert(ml_get_interrupts_enabled() == FALSE); | |
1143 | ||
1144 | mtc = &cpu->cpu_monotonic; | |
1145 | ||
1146 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
1147 | mt_core_set_snap(i, mtc->mtc_snaps[i]); | |
1148 | } | |
1149 | ||
1150 | /* re-enable the counters */ | |
1151 | core_init_execution_modes(); | |
1152 | ||
0a7de745 | 1153 | core_set_enabled(); |
5ba3f43e A |
1154 | } |
1155 | ||
5ba3f43e A |
1156 | void |
1157 | mt_cpu_down(cpu_data_t *cpu) | |
1158 | { | |
1159 | mt_cpu_idle(cpu); | |
1160 | } | |
1161 | ||
1162 | void | |
1163 | mt_cpu_up(cpu_data_t *cpu) | |
1164 | { | |
5ba3f43e A |
1165 | mt_cpu_run(cpu); |
1166 | } | |
1167 | ||
1168 | void | |
1169 | mt_sleep(void) | |
1170 | { | |
c6bf4f31 A |
1171 | #if HAS_UNCORE_CTRS |
1172 | uncore_save(); | |
1173 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1174 | } |
1175 | ||
1176 | void | |
d9a64523 | 1177 | mt_wake_per_core(void) |
5ba3f43e | 1178 | { |
c6bf4f31 A |
1179 | #if HAS_UNCORE_CTRS |
1180 | if (mt_uncore_initted) { | |
1181 | uncore_restore(); | |
1182 | } | |
1183 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1184 | } |
1185 | ||
cb323159 A |
1186 | uint64_t |
1187 | mt_count_pmis(void) | |
1188 | { | |
1189 | uint64_t npmis = 0; | |
f427ee49 A |
1190 | for (unsigned int i = 0; i < topology_info->num_cpus; i++) { |
1191 | cpu_data_t *cpu = (cpu_data_t *)CpuDataEntries[topology_info->cpus[i].cpu_id].cpu_data_vaddr; | |
cb323159 A |
1192 | npmis += cpu->cpu_monotonic.mtc_npmis; |
1193 | } | |
1194 | return npmis; | |
1195 | } | |
1196 | ||
d9a64523 | 1197 | static void |
0a7de745 | 1198 | mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0) |
5ba3f43e | 1199 | { |
5ba3f43e A |
1200 | assert(cpu != NULL); |
1201 | assert(ml_get_interrupts_enabled() == FALSE); | |
1202 | ||
c3c9b80d | 1203 | __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT); |
ea3f0419 A |
1204 | /* |
1205 | * Ensure the CPMU has flushed any increments at this point, so PMSR is up | |
1206 | * to date. | |
1207 | */ | |
1208 | __builtin_arm_isb(ISB_SY); | |
1209 | ||
cb323159 A |
1210 | cpu->cpu_monotonic.mtc_npmis += 1; |
1211 | cpu->cpu_stat.pmi_cnt_wake += 1; | |
0a7de745 A |
1212 | |
1213 | #if MONOTONIC_DEBUG | |
1214 | if (!PMCR0_PMI(pmcr0)) { | |
1215 | kprintf("monotonic: mt_cpu_pmi but no PMI (PMCR0 = %#llx)\n", | |
1216 | pmcr0); | |
1217 | } | |
1218 | #else /* MONOTONIC_DEBUG */ | |
1219 | #pragma unused(pmcr0) | |
1220 | #endif /* !MONOTONIC_DEBUG */ | |
1221 | ||
c3c9b80d | 1222 | uint64_t pmsr = __builtin_arm_rsr64("PMSR_EL1"); |
0a7de745 A |
1223 | |
1224 | #if MONOTONIC_DEBUG | |
ea3f0419 | 1225 | printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n", |
0a7de745 A |
1226 | cpu_number(), pmsr, pmcr0); |
1227 | #endif /* MONOTONIC_DEBUG */ | |
5ba3f43e | 1228 | |
ea3f0419 A |
1229 | #if MACH_ASSERT |
1230 | uint64_t handled = 0; | |
1231 | #endif /* MACH_ASSERT */ | |
1232 | ||
d9a64523 A |
1233 | /* |
1234 | * monotonic handles any fixed counter PMIs. | |
1235 | */ | |
1236 | for (unsigned int i = 0; i < MT_CORE_NFIXED; i++) { | |
1237 | if ((pmsr & PMSR_OVF(i)) == 0) { | |
1238 | continue; | |
1239 | } | |
1240 | ||
ea3f0419 A |
1241 | #if MACH_ASSERT |
1242 | handled |= 1ULL << i; | |
1243 | #endif /* MACH_ASSERT */ | |
d9a64523 A |
1244 | uint64_t count = mt_cpu_update_count(cpu, i); |
1245 | cpu->cpu_monotonic.mtc_counts[i] += count; | |
1246 | mt_core_set_snap(i, mt_core_reset_values[i]); | |
1247 | cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i]; | |
1248 | ||
1249 | if (mt_microstackshots && mt_microstackshot_ctr == i) { | |
1250 | bool user_mode = false; | |
1251 | arm_saved_state_t *state = get_user_regs(current_thread()); | |
1252 | if (state) { | |
1253 | user_mode = PSR64_IS_USER(get_saved_state_cpsr(state)); | |
1254 | } | |
1255 | KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1), | |
0a7de745 | 1256 | mt_microstackshot_ctr, user_mode); |
d9a64523 | 1257 | mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx); |
ea3f0419 A |
1258 | } else if (mt_debug) { |
1259 | KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 2), | |
1260 | i, count); | |
d9a64523 A |
1261 | } |
1262 | } | |
1263 | ||
1264 | /* | |
1265 | * KPC handles the configurable counter PMIs. | |
1266 | */ | |
1267 | for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) { | |
5ba3f43e | 1268 | if (pmsr & PMSR_OVF(i)) { |
ea3f0419 A |
1269 | #if MACH_ASSERT |
1270 | handled |= 1ULL << i; | |
1271 | #endif /* MACH_ASSERT */ | |
d9a64523 A |
1272 | extern void kpc_pmi_handler(unsigned int ctr); |
1273 | kpc_pmi_handler(i); | |
5ba3f43e A |
1274 | } |
1275 | } | |
1276 | ||
0a7de745 | 1277 | #if MACH_ASSERT |
c3c9b80d | 1278 | uint64_t pmsr_after_handling = __builtin_arm_rsr64("PMSR_EL1"); |
ea3f0419 A |
1279 | if (pmsr_after_handling != 0) { |
1280 | unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1; | |
1281 | uint64_t count = 0; | |
1282 | const char *extra = ""; | |
1283 | if (first_ctr_ovf >= CORE_NCTRS) { | |
1284 | extra = " (invalid counter)"; | |
1285 | } else { | |
1286 | count = mt_core_snap(first_ctr_ovf); | |
1287 | } | |
1288 | ||
1289 | panic("monotonic: PMI status not cleared on exit from handler, " | |
1290 | "PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, " | |
1291 | "PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling, | |
c3c9b80d | 1292 | handled, __builtin_arm_rsr64("PMCR0_EL1"), first_ctr_ovf, count, extra); |
ea3f0419 | 1293 | } |
0a7de745 A |
1294 | #endif /* MACH_ASSERT */ |
1295 | ||
5ba3f43e A |
1296 | core_set_enabled(); |
1297 | } | |
1298 | ||
0a7de745 | 1299 | #if CPMU_AIC_PMI |
5ba3f43e | 1300 | void |
0a7de745 | 1301 | mt_cpmu_aic_pmi(cpu_id_t source) |
d9a64523 | 1302 | { |
0a7de745 A |
1303 | struct cpu_data *curcpu = getCpuDatap(); |
1304 | if (source != curcpu->interrupt_nub) { | |
1305 | panic("monotonic: PMI from IOCPU %p delivered to %p", source, | |
1306 | curcpu->interrupt_nub); | |
1307 | } | |
c3c9b80d | 1308 | mt_cpu_pmi(curcpu, __builtin_arm_rsr64("PMCR0_EL1")); |
0a7de745 A |
1309 | } |
1310 | #endif /* CPMU_AIC_PMI */ | |
1311 | ||
1312 | void | |
1313 | mt_fiq(void *cpu, uint64_t pmcr0, uint64_t upmsr) | |
1314 | { | |
1315 | #if CPMU_AIC_PMI | |
1316 | #pragma unused(cpu, pmcr0) | |
1317 | #else /* CPMU_AIC_PMI */ | |
1318 | mt_cpu_pmi(cpu, pmcr0); | |
1319 | #endif /* !CPMU_AIC_PMI */ | |
d9a64523 | 1320 | |
c6bf4f31 A |
1321 | #if HAS_UNCORE_CTRS |
1322 | uncore_fiq(upmsr); | |
1323 | #else /* HAS_UNCORE_CTRS */ | |
d9a64523 | 1324 | #pragma unused(upmsr) |
c6bf4f31 | 1325 | #endif /* !HAS_UNCORE_CTRS */ |
d9a64523 A |
1326 | } |
1327 | ||
1328 | static uint32_t mt_xc_sync; | |
1329 | ||
1330 | static void | |
1331 | mt_microstackshot_start_remote(__unused void *arg) | |
1332 | { | |
1333 | cpu_data_t *cpu = getCpuDatap(); | |
1334 | ||
c3c9b80d | 1335 | __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT); |
d9a64523 A |
1336 | |
1337 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
1338 | uint64_t count = mt_cpu_update_count(cpu, i); | |
1339 | cpu->cpu_monotonic.mtc_counts[i] += count; | |
1340 | mt_core_set_snap(i, mt_core_reset_values[i]); | |
1341 | cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i]; | |
1342 | } | |
1343 | ||
1344 | core_set_enabled(); | |
1345 | ||
cb323159 | 1346 | if (os_atomic_dec(&mt_xc_sync, relaxed) == 0) { |
d9a64523 A |
1347 | thread_wakeup((event_t)&mt_xc_sync); |
1348 | } | |
1349 | } | |
1350 | ||
1351 | int | |
1352 | mt_microstackshot_start_arch(uint64_t period) | |
5ba3f43e | 1353 | { |
0a7de745 A |
1354 | uint64_t reset_value = 0; |
1355 | int ovf = os_sub_overflow(CTR_MAX, period, &reset_value); | |
1356 | if (ovf) { | |
1357 | return ERANGE; | |
1358 | } | |
1359 | ||
1360 | mt_core_reset_values[mt_microstackshot_ctr] = reset_value; | |
d9a64523 | 1361 | cpu_broadcast_xcall(&mt_xc_sync, TRUE, mt_microstackshot_start_remote, |
0a7de745 | 1362 | mt_microstackshot_start_remote /* cannot pass NULL */); |
d9a64523 | 1363 | return 0; |
5ba3f43e A |
1364 | } |
1365 | ||
1366 | #pragma mark dev nodes | |
1367 | ||
d9a64523 | 1368 | struct mt_device mt_devices[] = { |
5ba3f43e | 1369 | [0] = { |
d9a64523 | 1370 | .mtd_name = "core", |
5ba3f43e A |
1371 | .mtd_init = core_init, |
1372 | }, | |
c6bf4f31 A |
1373 | #if HAS_UNCORE_CTRS |
1374 | [1] = { | |
1375 | .mtd_name = "uncore", | |
1376 | .mtd_init = uncore_init, | |
1377 | .mtd_add = uncore_add, | |
1378 | .mtd_reset = uncore_reset, | |
1379 | .mtd_enable = uncore_set_enabled, | |
1380 | .mtd_read = uncore_read, | |
1381 | ||
c6bf4f31 A |
1382 | .mtd_ncounters = UNCORE_NCTRS, |
1383 | } | |
1384 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1385 | }; |
1386 | ||
1387 | static_assert( | |
0a7de745 A |
1388 | (sizeof(mt_devices) / sizeof(mt_devices[0])) == MT_NDEVS, |
1389 | "MT_NDEVS macro should be same as the length of mt_devices"); |