]>
Commit | Line | Data |
---|---|---|
5ba3f43e | 1 | /* |
0a7de745 | 2 | * Copyright (c) 2017-2019 Apple Inc. All rights reserved. |
5ba3f43e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <arm/cpu_data_internal.h> | |
30 | #include <arm/machine_routines.h> | |
31 | #include <arm64/monotonic.h> | |
d9a64523 | 32 | #include <kern/assert.h> |
5ba3f43e A |
33 | #include <kern/debug.h> /* panic */ |
34 | #include <kern/monotonic.h> | |
0a7de745 | 35 | #include <machine/atomic.h> |
5ba3f43e | 36 | #include <machine/limits.h> /* CHAR_BIT */ |
0a7de745 A |
37 | #include <os/overflow.h> |
38 | #include <pexpert/arm64/board_config.h> | |
39 | #include <pexpert/device_tree.h> /* DTFindEntry */ | |
40 | #include <pexpert/pexpert.h> | |
5ba3f43e A |
41 | #include <stdatomic.h> |
42 | #include <stdint.h> | |
43 | #include <string.h> | |
44 | #include <sys/errno.h> | |
45 | #include <sys/monotonic.h> | |
0a7de745 A |
46 | |
47 | /* | |
48 | * Ensure that control registers read back what was written under MACH_ASSERT | |
49 | * kernels. | |
50 | * | |
51 | * A static inline function cannot be used due to passing the register through | |
52 | * the builtin -- it requires a constant string as its first argument, since | |
53 | * MSRs registers are encoded as an immediate in the instruction. | |
54 | */ | |
55 | #if MACH_ASSERT | |
56 | #define CTRL_REG_SET(reg, val) do { \ | |
57 | __builtin_arm_wsr64((reg), (val)); \ | |
58 | uint64_t __check_reg = __builtin_arm_rsr64((reg)); \ | |
59 | if (__check_reg != (val)) { \ | |
60 | panic("value written to %s was not read back (wrote %llx, read %llx)", \ | |
61 | #reg, (val), __check_reg); \ | |
62 | } \ | |
63 | } while (0) | |
64 | #else /* MACH_ASSERT */ | |
65 | #define CTRL_REG_SET(reg, val) __builtin_arm_wsr64((reg), (val)) | |
66 | #endif /* MACH_ASSERT */ | |
5ba3f43e A |
67 | |
68 | #pragma mark core counters | |
69 | ||
70 | bool mt_core_supported = true; | |
5ba3f43e A |
71 | |
72 | /* | |
73 | * PMC[0-1] are the 48-bit fixed counters -- PMC0 is cycles and PMC1 is | |
74 | * instructions (see arm64/monotonic.h). | |
75 | * | |
76 | * PMC2+ are currently handled by kpc. | |
77 | */ | |
78 | ||
79 | #define PMC0 "s3_2_c15_c0_0" | |
80 | #define PMC1 "s3_2_c15_c1_0" | |
81 | #define PMC2 "s3_2_c15_c2_0" | |
82 | #define PMC3 "s3_2_c15_c3_0" | |
83 | #define PMC4 "s3_2_c15_c4_0" | |
84 | #define PMC5 "s3_2_c15_c5_0" | |
85 | #define PMC6 "s3_2_c15_c6_0" | |
86 | #define PMC7 "s3_2_c15_c7_0" | |
87 | #define PMC8 "s3_2_c15_c9_0" | |
88 | #define PMC9 "s3_2_c15_c10_0" | |
89 | ||
d9a64523 A |
90 | #define CTR_MAX ((UINT64_C(1) << 47) - 1) |
91 | ||
5ba3f43e A |
92 | #define CYCLES 0 |
93 | #define INSTRS 1 | |
94 | ||
95 | /* | |
96 | * PMC0's offset into a core's PIO range. | |
97 | * | |
98 | * This allows cores to remotely query another core's counters. | |
99 | */ | |
100 | ||
101 | #define PIO_PMC0_OFFSET (0x200) | |
102 | ||
103 | /* | |
104 | * The offset of the counter in the configuration registers. Post-Hurricane | |
105 | * devices have additional counters that need a larger shift than the original | |
106 | * counters. | |
107 | * | |
108 | * XXX For now, just support the lower-numbered counters. | |
109 | */ | |
110 | #define CTR_POS(CTR) (CTR) | |
111 | ||
112 | /* | |
113 | * PMCR0 is the main control register for the performance monitor. It | |
114 | * controls whether the counters are enabled, how they deliver interrupts, and | |
115 | * other features. | |
116 | */ | |
117 | ||
5ba3f43e A |
118 | #define PMCR0_CTR_EN(CTR) (UINT64_C(1) << CTR_POS(CTR)) |
119 | #define PMCR0_FIXED_EN (PMCR0_CTR_EN(CYCLES) | PMCR0_CTR_EN(INSTRS)) | |
120 | /* how interrupts are delivered on a PMI */ | |
121 | enum { | |
122 | PMCR0_INTGEN_OFF = 0, | |
123 | PMCR0_INTGEN_PMI = 1, | |
124 | PMCR0_INTGEN_AIC = 2, | |
125 | PMCR0_INTGEN_HALT = 3, | |
126 | PMCR0_INTGEN_FIQ = 4, | |
127 | }; | |
128 | #define PMCR0_INTGEN_SET(INT) ((uint64_t)(INT) << 8) | |
0a7de745 A |
129 | |
130 | #if CPMU_AIC_PMI | |
131 | #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC) | |
132 | #else /* CPMU_AIC_PMI */ | |
d9a64523 | 133 | #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ) |
0a7de745 A |
134 | #endif /* !CPMU_AIC_PMI */ |
135 | ||
5ba3f43e | 136 | #define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (12 + CTR_POS(CTR))) |
d9a64523 | 137 | /* fixed counters are always counting */ |
5ba3f43e | 138 | #define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS)) |
d9a64523 | 139 | /* disable counting on a PMI */ |
5ba3f43e A |
140 | #define PMCR0_DISCNT_EN (UINT64_C(1) << 20) |
141 | /* block PMIs until ERET retires */ | |
142 | #define PMCR0_WFRFE_EN (UINT64_C(1) << 22) | |
143 | /* count global (not just core-local) L2C events */ | |
144 | #define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23) | |
145 | /* user mode access to configuration registers */ | |
146 | #define PMCR0_USEREN_EN (UINT64_C(1) << 30) | |
147 | ||
5ba3f43e A |
148 | #define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT | PMCR0_DISCNT_EN) |
149 | ||
150 | /* | |
151 | * PMCR1 controls which execution modes count events. | |
152 | */ | |
153 | ||
154 | #define PMCR1 "s3_1_c15_c1_0" | |
155 | ||
156 | #define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR))) | |
157 | #define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR))) | |
158 | #define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR))) | |
159 | /* PMCR1_EL3A64 is not supported on systems with no monitor */ | |
160 | #if defined(APPLEHURRICANE) | |
161 | #define PMCR1_EL3A64_EN(CTR) UINT64_C(0) | |
162 | #else | |
163 | #define PMCR1_EL3A64_EN(CTR) (UINT64_C(1) << (24 + CTR_POS(CTR))) | |
164 | #endif | |
165 | #define PMCR1_ALL_EN(CTR) (PMCR1_EL0A32_EN(CTR) | PMCR1_EL0A64_EN(CTR) | \ | |
0a7de745 | 166 | PMCR1_EL1A64_EN(CTR) | PMCR1_EL3A64_EN(CTR)) |
5ba3f43e A |
167 | |
168 | /* fixed counters always count in all modes */ | |
169 | #define PMCR1_INIT (PMCR1_ALL_EN(CYCLES) | PMCR1_ALL_EN(INSTRS)) | |
170 | ||
171 | static inline void | |
172 | core_init_execution_modes(void) | |
173 | { | |
174 | uint64_t pmcr1; | |
175 | ||
176 | pmcr1 = __builtin_arm_rsr64(PMCR1); | |
177 | pmcr1 |= PMCR1_INIT; | |
178 | __builtin_arm_wsr64(PMCR1, pmcr1); | |
179 | } | |
180 | ||
5ba3f43e A |
181 | /* |
182 | * PMCR2 controls watchpoint registers. | |
183 | * | |
184 | * PMCR3 controls breakpoints and address matching. | |
185 | * | |
186 | * PMCR4 controls opcode matching. | |
187 | */ | |
188 | ||
189 | #define PMCR2 "s3_1_c15_c2_0" | |
190 | #define PMCR3 "s3_1_c15_c3_0" | |
191 | #define PMCR4 "s3_1_c15_c4_0" | |
192 | ||
0a7de745 | 193 | #define PMSR "s3_1_c15_c13_0" |
5ba3f43e | 194 | |
0a7de745 | 195 | #define PMSR_OVF(CTR) (1ULL << (CTR)) |
5ba3f43e A |
196 | |
197 | static int | |
d9a64523 | 198 | core_init(__unused mt_device_t dev) |
5ba3f43e A |
199 | { |
200 | /* the dev node interface to the core counters is still unsupported */ | |
201 | return ENOTSUP; | |
202 | } | |
203 | ||
204 | struct mt_cpu * | |
205 | mt_cur_cpu(void) | |
206 | { | |
207 | return &getCpuDatap()->cpu_monotonic; | |
208 | } | |
209 | ||
210 | uint64_t | |
211 | mt_core_snap(unsigned int ctr) | |
212 | { | |
213 | switch (ctr) { | |
214 | case 0: | |
215 | return __builtin_arm_rsr64(PMC0); | |
216 | case 1: | |
217 | return __builtin_arm_rsr64(PMC1); | |
218 | default: | |
219 | panic("monotonic: invalid core counter read: %u", ctr); | |
d9a64523 | 220 | __builtin_unreachable(); |
5ba3f43e A |
221 | } |
222 | } | |
223 | ||
224 | void | |
225 | mt_core_set_snap(unsigned int ctr, uint64_t count) | |
226 | { | |
227 | switch (ctr) { | |
228 | case 0: | |
229 | __builtin_arm_wsr64(PMC0, count); | |
230 | break; | |
231 | case 1: | |
232 | __builtin_arm_wsr64(PMC1, count); | |
233 | break; | |
234 | default: | |
235 | panic("monotonic: invalid core counter %u write %llu", ctr, count); | |
d9a64523 | 236 | __builtin_unreachable(); |
5ba3f43e A |
237 | } |
238 | } | |
239 | ||
240 | static void | |
241 | core_set_enabled(void) | |
242 | { | |
0a7de745 | 243 | uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0); |
5ba3f43e | 244 | pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN; |
0a7de745 | 245 | pmcr0 &= ~PMCR0_PMAI; |
5ba3f43e | 246 | __builtin_arm_wsr64(PMCR0, pmcr0); |
0a7de745 A |
247 | #if MACH_ASSERT |
248 | /* | |
249 | * Only check for the values that were ORed in. | |
250 | */ | |
251 | uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0); | |
252 | if (!(pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN))) { | |
253 | panic("monotonic: hardware ignored enable (read %llx)", | |
254 | pmcr0_check); | |
255 | } | |
256 | #endif /* MACH_ASSERT */ | |
5ba3f43e A |
257 | } |
258 | ||
259 | static void | |
260 | core_idle(__unused cpu_data_t *cpu) | |
261 | { | |
262 | assert(cpu != NULL); | |
263 | assert(ml_get_interrupts_enabled() == FALSE); | |
264 | ||
265 | #if DEBUG | |
266 | uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0); | |
267 | if ((pmcr0 & PMCR0_FIXED_EN) == 0) { | |
0a7de745 | 268 | panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0); |
5ba3f43e A |
269 | } |
270 | uint64_t pmcr1 = __builtin_arm_rsr64(PMCR1); | |
271 | if ((pmcr1 & PMCR1_INIT) == 0) { | |
0a7de745 | 272 | panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1); |
5ba3f43e A |
273 | } |
274 | #endif /* DEBUG */ | |
275 | ||
276 | /* disable counters before updating */ | |
277 | __builtin_arm_wsr64(PMCR0, PMCR0_INIT); | |
278 | ||
279 | mt_update_fixed_counts(); | |
280 | } | |
281 | ||
d9a64523 A |
282 | #pragma mark uncore performance monitor |
283 | ||
c6bf4f31 A |
284 | #if HAS_UNCORE_CTRS |
285 | ||
286 | static bool mt_uncore_initted = false; | |
287 | ||
288 | /* | |
289 | * Uncore Performance Monitor | |
290 | * | |
291 | * Uncore performance monitors provide event-counting for the last-level caches | |
292 | * (LLCs). Each LLC has its own uncore performance monitor, which can only be | |
293 | * accessed by cores that use that LLC. Like the core performance monitoring | |
294 | * unit, uncore counters are configured globally. If there is more than one | |
295 | * LLC on the system, PIO reads must be used to satisfy uncore requests (using | |
296 | * the `_r` remote variants of the access functions). Otherwise, local MSRs | |
297 | * suffice (using the `_l` local variants of the access functions). | |
298 | */ | |
299 | ||
300 | #if UNCORE_PER_CLUSTER | |
301 | static vm_size_t cpm_impl_size = 0; | |
302 | static uintptr_t cpm_impl[__ARM_CLUSTER_COUNT__] = {}; | |
303 | static uintptr_t cpm_impl_phys[__ARM_CLUSTER_COUNT__] = {}; | |
304 | #endif /* UNCORE_PER_CLUSTER */ | |
305 | ||
306 | #if UNCORE_VERSION >= 2 | |
307 | /* | |
308 | * V2 uncore monitors feature a CTI mechanism -- the second bit of UPMSR is | |
309 | * used to track if a CTI has been triggered due to an overflow. | |
310 | */ | |
311 | #define UPMSR_OVF_POS 2 | |
312 | #else /* UNCORE_VERSION >= 2 */ | |
313 | #define UPMSR_OVF_POS 1 | |
314 | #endif /* UNCORE_VERSION < 2 */ | |
315 | #define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1) | |
316 | #define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS) | |
317 | ||
318 | #define UPMPCM "s3_7_c15_c5_4" | |
319 | #define UPMPCM_CORE(ID) (UINT64_C(1) << (ID)) | |
320 | ||
321 | /* | |
322 | * The uncore_pmi_mask is a bitmask of CPUs that receive uncore PMIs. It's | |
323 | * initialized by uncore_init and controllable by the uncore_pmi_mask boot-arg. | |
324 | */ | |
325 | static int32_t uncore_pmi_mask = 0; | |
326 | ||
327 | /* | |
328 | * The uncore_active_ctrs is a bitmask of uncore counters that are currently | |
329 | * requested. | |
330 | */ | |
331 | static uint16_t uncore_active_ctrs = 0; | |
332 | static_assert(sizeof(uncore_active_ctrs) * CHAR_BIT >= UNCORE_NCTRS, | |
333 | "counter mask should fit the full range of counters"); | |
334 | ||
335 | /* | |
336 | * mt_uncore_enabled is true when any uncore counters are active. | |
337 | */ | |
338 | bool mt_uncore_enabled = false; | |
339 | ||
340 | /* | |
341 | * Each uncore unit has its own monitor, corresponding to the memory hierarchy | |
342 | * of the LLCs. | |
343 | */ | |
344 | #if UNCORE_PER_CLUSTER | |
345 | #define UNCORE_NMONITORS (__ARM_CLUSTER_COUNT__) | |
346 | #else /* UNCORE_PER_CLUSTER */ | |
347 | #define UNCORE_NMONITORS (1) | |
348 | #endif /* !UNCORE_PER_CLUSTER */ | |
349 | ||
350 | /* | |
351 | * The uncore_events are the event configurations for each uncore counter -- as | |
352 | * a union to make it easy to program the hardware registers. | |
353 | */ | |
354 | static struct uncore_config { | |
355 | union { | |
356 | uint8_t uce_ctrs[UNCORE_NCTRS]; | |
357 | uint64_t uce_regs[UNCORE_NCTRS / 8]; | |
358 | } uc_events; | |
359 | union { | |
360 | uint16_t uccm_masks[UNCORE_NCTRS]; | |
361 | uint64_t uccm_regs[UNCORE_NCTRS / 4]; | |
362 | } uc_cpu_masks[UNCORE_NMONITORS]; | |
363 | } uncore_config; | |
364 | ||
365 | static struct uncore_monitor { | |
366 | /* | |
367 | * The last snapshot of each of the hardware counter values. | |
368 | */ | |
369 | uint64_t um_snaps[UNCORE_NCTRS]; | |
370 | ||
371 | /* | |
372 | * The accumulated counts for each counter. | |
373 | */ | |
374 | uint64_t um_counts[UNCORE_NCTRS]; | |
375 | ||
376 | /* | |
377 | * Protects accessing the hardware registers and fields in this structure. | |
378 | */ | |
379 | lck_spin_t um_lock; | |
380 | ||
381 | /* | |
382 | * Whether this monitor needs its registers restored after wake. | |
383 | */ | |
384 | bool um_sleeping; | |
385 | } uncore_monitors[UNCORE_NMONITORS]; | |
386 | ||
387 | static unsigned int | |
388 | uncmon_get_curid(void) | |
389 | { | |
390 | #if UNCORE_PER_CLUSTER | |
391 | return cpu_cluster_id(); | |
392 | #else /* UNCORE_PER_CLUSTER */ | |
393 | return 0; | |
394 | #endif /* !UNCORE_PER_CLUSTER */ | |
395 | } | |
396 | ||
397 | /* | |
398 | * Per-monitor locks are required to prevent races with the PMI handlers, not | |
399 | * from other CPUs that are configuring (those are serialized with monotonic's | |
400 | * per-device lock). | |
401 | */ | |
402 | ||
403 | static int | |
404 | uncmon_lock(struct uncore_monitor *mon) | |
405 | { | |
406 | int intrs_en = ml_set_interrupts_enabled(FALSE); | |
407 | lck_spin_lock(&mon->um_lock); | |
408 | return intrs_en; | |
409 | } | |
410 | ||
411 | static void | |
412 | uncmon_unlock(struct uncore_monitor *mon, int intrs_en) | |
413 | { | |
414 | lck_spin_unlock(&mon->um_lock); | |
415 | (void)ml_set_interrupts_enabled(intrs_en); | |
416 | } | |
417 | ||
418 | /* | |
419 | * Helper functions for accessing the hardware -- these require the monitor be | |
420 | * locked to prevent other CPUs' PMI handlers from making local modifications | |
421 | * or updating the counts. | |
422 | */ | |
423 | ||
424 | #if UNCORE_VERSION >= 2 | |
425 | #define UPMCR0_INTEN_POS 20 | |
426 | #define UPMCR0_INTGEN_POS 16 | |
427 | #else /* UNCORE_VERSION >= 2 */ | |
428 | #define UPMCR0_INTEN_POS 12 | |
429 | #define UPMCR0_INTGEN_POS 8 | |
430 | #endif /* UNCORE_VERSION < 2 */ | |
431 | enum { | |
432 | UPMCR0_INTGEN_OFF = 0, | |
433 | /* fast PMIs are only supported on core CPMU */ | |
434 | UPMCR0_INTGEN_AIC = 2, | |
435 | UPMCR0_INTGEN_HALT = 3, | |
436 | UPMCR0_INTGEN_FIQ = 4, | |
437 | }; | |
438 | /* always enable interrupts for all counters */ | |
439 | #define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS) | |
440 | /* route uncore PMIs through the FIQ path */ | |
441 | #define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS)) | |
442 | ||
443 | /* | |
444 | * Turn counting on for counters set in the `enctrmask` and off, otherwise. | |
445 | */ | |
446 | static inline void | |
447 | uncmon_set_counting_locked_l(__unused unsigned int monid, uint64_t enctrmask) | |
448 | { | |
449 | /* | |
450 | * UPMCR0 controls which counters are enabled and how interrupts are generated | |
451 | * for overflows. | |
452 | */ | |
453 | #define UPMCR0 "s3_7_c15_c0_4" | |
454 | __builtin_arm_wsr64(UPMCR0, UPMCR0_INIT | enctrmask); | |
455 | } | |
456 | ||
457 | #if UNCORE_PER_CLUSTER | |
458 | ||
459 | /* | |
460 | * Turn counting on for counters set in the `enctrmask` and off, otherwise. | |
461 | */ | |
462 | static inline void | |
463 | uncmon_set_counting_locked_r(unsigned int monid, uint64_t enctrmask) | |
464 | { | |
465 | const uintptr_t upmcr0_offset = 0x4180; | |
466 | *(uint64_t *)(cpm_impl[monid] + upmcr0_offset) = UPMCR0_INIT | enctrmask; | |
467 | } | |
468 | ||
469 | #endif /* UNCORE_PER_CLUSTER */ | |
470 | ||
471 | /* | |
472 | * The uncore performance monitoring counters (UPMCs) are 48-bits wide. The | |
473 | * high bit is an overflow bit, triggering a PMI, providing 47 usable bits. | |
474 | */ | |
475 | ||
476 | #define UPMC_MAX ((UINT64_C(1) << 48) - 1) | |
477 | ||
478 | /* | |
479 | * The `__builtin_arm_{r,w}sr` functions require constant strings, since the | |
480 | * MSR/MRS instructions encode the registers as immediates. Otherwise, this | |
481 | * would be indexing into an array of strings. | |
482 | */ | |
483 | ||
484 | #define UPMC0 "s3_7_c15_c7_4" | |
485 | #define UPMC1 "s3_7_c15_c8_4" | |
486 | #define UPMC2 "s3_7_c15_c9_4" | |
487 | #define UPMC3 "s3_7_c15_c10_4" | |
488 | #define UPMC4 "s3_7_c15_c11_4" | |
489 | #define UPMC5 "s3_7_c15_c12_4" | |
490 | #define UPMC6 "s3_7_c15_c13_4" | |
491 | #define UPMC7 "s3_7_c15_c14_4" | |
492 | #if UNCORE_NCTRS > 8 | |
493 | #define UPMC8 "s3_7_c15_c0_5" | |
494 | #define UPMC9 "s3_7_c15_c1_5" | |
495 | #define UPMC10 "s3_7_c15_c2_5" | |
496 | #define UPMC11 "s3_7_c15_c3_5" | |
497 | #define UPMC12 "s3_7_c15_c4_5" | |
498 | #define UPMC13 "s3_7_c15_c5_5" | |
499 | #define UPMC14 "s3_7_c15_c6_5" | |
500 | #define UPMC15 "s3_7_c15_c7_5" | |
501 | #endif /* UNCORE_NCTRS > 8 */ | |
502 | ||
503 | #define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \ | |
504 | X(6, A); X(7, A) | |
505 | #if UNCORE_NCTRS <= 8 | |
506 | #define UPMC_ALL(X, A) UPMC_0_7(X, A) | |
507 | #else /* UNCORE_NCTRS <= 8 */ | |
508 | #define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \ | |
509 | X(13, A); X(14, A); X(15, A) | |
510 | #define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A) | |
511 | #endif /* UNCORE_NCTRS > 8 */ | |
512 | ||
513 | static inline uint64_t | |
514 | uncmon_read_counter_locked_l(__unused unsigned int monid, unsigned int ctr) | |
515 | { | |
516 | assert(ctr < UNCORE_NCTRS); | |
517 | switch (ctr) { | |
518 | #define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(UPMC ## CTR) | |
519 | UPMC_ALL(UPMC_RD, 0); | |
520 | #undef UPMC_RD | |
521 | default: | |
522 | panic("monotonic: invalid counter read %u", ctr); | |
523 | __builtin_unreachable(); | |
524 | } | |
525 | } | |
526 | ||
527 | static inline void | |
528 | uncmon_write_counter_locked_l(__unused unsigned int monid, unsigned int ctr, | |
529 | uint64_t count) | |
530 | { | |
531 | assert(count < UPMC_MAX); | |
532 | assert(ctr < UNCORE_NCTRS); | |
533 | switch (ctr) { | |
534 | #define UPMC_WR(CTR, COUNT) case (CTR): \ | |
535 | return __builtin_arm_wsr64(UPMC ## CTR, (COUNT)) | |
536 | UPMC_ALL(UPMC_WR, count); | |
537 | #undef UPMC_WR | |
538 | default: | |
539 | panic("monotonic: invalid counter write %u", ctr); | |
540 | } | |
541 | } | |
542 | ||
543 | #if UNCORE_PER_CLUSTER | |
544 | ||
545 | static const uint8_t clust_offs[__ARM_CLUSTER_COUNT__] = CPU_CLUSTER_OFFSETS; | |
546 | ||
547 | uintptr_t upmc_offs[UNCORE_NCTRS] = { | |
548 | [0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120, | |
549 | [5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268, | |
550 | [10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278, | |
551 | [14] = 0x4170, [15] = 0x4280, | |
552 | }; | |
553 | ||
554 | static inline uint64_t | |
555 | uncmon_read_counter_locked_r(unsigned int mon_id, unsigned int ctr) | |
556 | { | |
557 | assert(mon_id < __ARM_CLUSTER_COUNT__); | |
558 | assert(ctr < UNCORE_NCTRS); | |
559 | return *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]); | |
560 | } | |
561 | ||
562 | static inline void | |
563 | uncmon_write_counter_locked_r(unsigned int mon_id, unsigned int ctr, | |
564 | uint64_t count) | |
565 | { | |
566 | assert(count < UPMC_MAX); | |
567 | assert(ctr < UNCORE_NCTRS); | |
568 | assert(mon_id < __ARM_CLUSTER_COUNT__); | |
569 | *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]) = count; | |
570 | } | |
571 | ||
572 | #endif /* UNCORE_PER_CLUSTER */ | |
573 | ||
574 | static inline void | |
575 | uncmon_update_locked(unsigned int monid, unsigned int curid, unsigned int ctr) | |
576 | { | |
577 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
578 | uint64_t snap = 0; | |
579 | if (curid == monid) { | |
580 | snap = uncmon_read_counter_locked_l(monid, ctr); | |
581 | } else { | |
582 | #if UNCORE_PER_CLUSTER | |
583 | snap = uncmon_read_counter_locked_r(monid, ctr); | |
584 | #endif /* UNCORE_PER_CLUSTER */ | |
585 | } | |
586 | /* counters should increase monotonically */ | |
587 | assert(snap >= mon->um_snaps[ctr]); | |
588 | mon->um_counts[ctr] += snap - mon->um_snaps[ctr]; | |
589 | mon->um_snaps[ctr] = snap; | |
590 | } | |
591 | ||
592 | static inline void | |
593 | uncmon_program_events_locked_l(unsigned int monid) | |
594 | { | |
595 | /* | |
596 | * UPMESR[01] is the event selection register that determines which event a | |
597 | * counter will count. | |
598 | */ | |
599 | #define UPMESR0 "s3_7_c15_c1_4" | |
600 | CTRL_REG_SET(UPMESR0, uncore_config.uc_events.uce_regs[0]); | |
601 | ||
602 | #if UNCORE_NCTRS > 8 | |
603 | #define UPMESR1 "s3_7_c15_c11_5" | |
604 | CTRL_REG_SET(UPMESR1, uncore_config.uc_events.uce_regs[1]); | |
605 | #endif /* UNCORE_NCTRS > 8 */ | |
606 | ||
607 | /* | |
608 | * UPMECM[0123] are the event core masks for each counter -- whether or not | |
609 | * that counter counts events generated by an agent. These are set to all | |
610 | * ones so the uncore counters count events from all cores. | |
611 | * | |
612 | * The bits are based off the start of the cluster -- e.g. even if a core | |
613 | * has a CPU ID of 4, it might be the first CPU in a cluster. Shift the | |
614 | * registers right by the ID of the first CPU in the cluster. | |
615 | */ | |
616 | #define UPMECM0 "s3_7_c15_c3_4" | |
617 | #define UPMECM1 "s3_7_c15_c4_4" | |
618 | ||
619 | CTRL_REG_SET(UPMECM0, | |
620 | uncore_config.uc_cpu_masks[monid].uccm_regs[0]); | |
621 | CTRL_REG_SET(UPMECM1, | |
622 | uncore_config.uc_cpu_masks[monid].uccm_regs[1]); | |
623 | ||
624 | #if UNCORE_NCTRS > 8 | |
625 | #define UPMECM2 "s3_7_c15_c8_5" | |
626 | #define UPMECM3 "s3_7_c15_c9_5" | |
627 | ||
628 | CTRL_REG_SET(UPMECM2, | |
629 | uncore_config.uc_cpu_masks[monid].uccm_regs[2]); | |
630 | CTRL_REG_SET(UPMECM3, | |
631 | uncore_config.uc_cpu_masks[monid].uccm_regs[3]); | |
632 | #endif /* UNCORE_NCTRS > 8 */ | |
633 | } | |
634 | ||
635 | #if UNCORE_PER_CLUSTER | |
636 | ||
637 | static inline void | |
638 | uncmon_program_events_locked_r(unsigned int monid) | |
639 | { | |
640 | const uintptr_t upmesr_offs[2] = {[0] = 0x41b0, [1] = 0x41b8, }; | |
641 | ||
642 | for (unsigned int i = 0; i < sizeof(upmesr_offs) / sizeof(upmesr_offs[0]); | |
643 | i++) { | |
644 | *(uint64_t *)(cpm_impl[monid] + upmesr_offs[i]) = | |
645 | uncore_config.uc_events.uce_regs[i]; | |
646 | } | |
647 | ||
648 | const uintptr_t upmecm_offs[4] = { | |
649 | [0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8, | |
650 | }; | |
651 | ||
652 | for (unsigned int i = 0; i < sizeof(upmecm_offs) / sizeof(upmecm_offs[0]); | |
653 | i++) { | |
654 | *(uint64_t *)(cpm_impl[monid] + upmecm_offs[i]) = | |
655 | uncore_config.uc_cpu_masks[monid].uccm_regs[i]; | |
656 | } | |
657 | } | |
658 | ||
659 | #endif /* UNCORE_PER_CLUSTER */ | |
660 | ||
661 | static void | |
662 | uncmon_clear_int_locked_l(__unused unsigned int monid) | |
663 | { | |
664 | __builtin_arm_wsr64(UPMSR, 0); | |
665 | } | |
666 | ||
667 | #if UNCORE_PER_CLUSTER | |
668 | ||
669 | static void | |
670 | uncmon_clear_int_locked_r(unsigned int monid) | |
671 | { | |
672 | const uintptr_t upmsr_off = 0x41c0; | |
673 | *(uint64_t *)(cpm_impl[monid] + upmsr_off) = 0; | |
674 | } | |
675 | ||
676 | #endif /* UNCORE_PER_CLUSTER */ | |
677 | ||
678 | /* | |
679 | * Get the PMI mask for the provided `monid` -- that is, the bitmap of CPUs | |
680 | * that should be sent PMIs for a particular monitor. | |
681 | */ | |
682 | static uint64_t | |
683 | uncmon_get_pmi_mask(unsigned int monid) | |
684 | { | |
685 | uint64_t pmi_mask = uncore_pmi_mask; | |
686 | ||
687 | #if UNCORE_PER_CLUSTER | |
688 | /* | |
689 | * Set up the mask for the high bits. | |
690 | */ | |
691 | uint64_t clust_cpumask; | |
692 | if (monid == __ARM_CLUSTER_COUNT__ - 1) { | |
693 | clust_cpumask = UINT64_MAX; | |
694 | } else { | |
695 | clust_cpumask = ((1ULL << clust_offs[monid + 1]) - 1); | |
696 | } | |
697 | ||
698 | /* | |
699 | * Mask off the low bits, if necessary. | |
700 | */ | |
701 | if (clust_offs[monid] != 0) { | |
702 | clust_cpumask &= ~((1ULL << clust_offs[monid]) - 1); | |
703 | } | |
704 | ||
705 | pmi_mask &= clust_cpumask; | |
706 | #else /* UNCORE_PER_CLUSTER */ | |
707 | #pragma unused(monid) | |
708 | #endif /* !UNCORE_PER_CLUSTER */ | |
709 | ||
710 | return pmi_mask; | |
711 | } | |
712 | ||
713 | /* | |
714 | * Initialization routines for the uncore counters. | |
715 | */ | |
716 | ||
717 | static void | |
718 | uncmon_init_locked_l(unsigned int monid) | |
719 | { | |
720 | /* | |
721 | * UPMPCM defines the PMI core mask for the UPMCs -- which cores should | |
722 | * receive interrupts on overflow. | |
723 | */ | |
724 | CTRL_REG_SET(UPMPCM, uncmon_get_pmi_mask(monid)); | |
725 | uncmon_set_counting_locked_l(monid, | |
726 | mt_uncore_enabled ? uncore_active_ctrs : 0); | |
727 | } | |
728 | ||
729 | #if UNCORE_PER_CLUSTER | |
730 | ||
731 | static vm_size_t acc_impl_size = 0; | |
732 | static uintptr_t acc_impl[__ARM_CLUSTER_COUNT__] = {}; | |
733 | static uintptr_t acc_impl_phys[__ARM_CLUSTER_COUNT__] = {}; | |
734 | ||
735 | static void | |
736 | uncmon_init_locked_r(unsigned int monid) | |
737 | { | |
738 | const uintptr_t upmpcm_off = 0x1010; | |
739 | ||
740 | *(uint64_t *)(acc_impl[monid] + upmpcm_off) = uncmon_get_pmi_mask(monid); | |
741 | uncmon_set_counting_locked_r(monid, | |
742 | mt_uncore_enabled ? uncore_active_ctrs : 0); | |
743 | } | |
744 | ||
745 | #endif /* UNCORE_PER_CLUSTER */ | |
746 | ||
747 | /* | |
748 | * Initialize the uncore device for monotonic. | |
749 | */ | |
750 | static int | |
751 | uncore_init(__unused mt_device_t dev) | |
752 | { | |
753 | #if DEVELOPMENT || DEBUG | |
754 | /* | |
755 | * Development and debug kernels observe the `uncore_pmi_mask` boot-arg, | |
756 | * allowing PMIs to be routed to the CPUs present in the supplied bitmap. | |
757 | * Do some sanity checks on the value provided. | |
758 | */ | |
759 | bool parsed_arg = PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask, | |
760 | sizeof(uncore_pmi_mask)); | |
761 | if (parsed_arg) { | |
762 | #if UNCORE_PER_CLUSTER | |
763 | if (__builtin_popcount(uncore_pmi_mask) != __ARM_CLUSTER_COUNT__) { | |
764 | panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask); | |
765 | } | |
766 | for (unsigned int i = 0; i < __ARM_CLUSTER_COUNT__; i++) { | |
767 | if (__builtin_popcountll(uncmon_get_pmi_mask(i)) != 1) { | |
768 | panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x", | |
769 | i, uncore_pmi_mask); | |
770 | } | |
771 | } | |
772 | #else /* UNCORE_PER_CLUSTER */ | |
773 | if (__builtin_popcount(uncore_pmi_mask) != 1) { | |
774 | panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask); | |
775 | } | |
776 | #endif /* !UNCORE_PER_CLUSTER */ | |
777 | } else | |
778 | #endif /* DEVELOPMENT || DEBUG */ | |
779 | { | |
780 | #if UNCORE_PER_CLUSTER | |
781 | for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) { | |
782 | /* route to the first CPU in each cluster */ | |
783 | uncore_pmi_mask |= (1ULL << clust_offs[i]); | |
784 | } | |
785 | #else /* UNCORE_PER_CLUSTER */ | |
786 | /* arbitrarily route to core 0 */ | |
787 | uncore_pmi_mask |= 1; | |
788 | #endif /* !UNCORE_PER_CLUSTER */ | |
789 | } | |
790 | assert(uncore_pmi_mask != 0); | |
791 | ||
792 | unsigned int curmonid = uncmon_get_curid(); | |
793 | ||
794 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
795 | #if UNCORE_PER_CLUSTER | |
796 | cpm_impl[monid] = (uintptr_t)ml_io_map(cpm_impl_phys[monid], | |
797 | cpm_impl_size); | |
798 | assert(cpm_impl[monid] != 0); | |
799 | ||
800 | acc_impl[monid] = (uintptr_t)ml_io_map(acc_impl_phys[monid], | |
801 | acc_impl_size); | |
802 | assert(acc_impl[monid] != 0); | |
803 | #endif /* UNCORE_PER_CLUSTER */ | |
804 | ||
805 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
806 | lck_spin_init(&mon->um_lock, mt_lock_grp, NULL); | |
807 | ||
808 | int intrs_en = uncmon_lock(mon); | |
809 | if (monid != curmonid) { | |
810 | #if UNCORE_PER_CLUSTER | |
811 | uncmon_init_locked_r(monid); | |
812 | #endif /* UNCORE_PER_CLUSTER */ | |
813 | } else { | |
814 | uncmon_init_locked_l(monid); | |
815 | } | |
816 | uncmon_unlock(mon, intrs_en); | |
817 | } | |
818 | ||
819 | mt_uncore_initted = true; | |
820 | ||
821 | return 0; | |
822 | } | |
823 | ||
824 | /* | |
825 | * Support for monotonic's mtd_read function. | |
826 | */ | |
827 | ||
828 | static void | |
829 | uncmon_read_all_counters(unsigned int monid, unsigned int curmonid, | |
830 | uint64_t ctr_mask, uint64_t *counts) | |
831 | { | |
832 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
833 | ||
834 | int intrs_en = uncmon_lock(mon); | |
835 | ||
836 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
837 | if (ctr_mask & (1ULL << ctr)) { | |
838 | uncmon_update_locked(monid, curmonid, ctr); | |
839 | counts[ctr] = mon->um_counts[ctr]; | |
840 | } | |
841 | } | |
842 | ||
843 | uncmon_unlock(mon, intrs_en); | |
844 | } | |
845 | ||
846 | /* | |
847 | * Read all monitor's counters. | |
848 | */ | |
849 | static int | |
850 | uncore_read(uint64_t ctr_mask, uint64_t *counts_out) | |
851 | { | |
852 | assert(ctr_mask != 0); | |
853 | assert(counts_out != NULL); | |
854 | ||
855 | if (!uncore_active_ctrs) { | |
856 | return EPWROFF; | |
857 | } | |
858 | if (ctr_mask & ~uncore_active_ctrs) { | |
859 | return EINVAL; | |
860 | } | |
861 | ||
862 | unsigned int curmonid = uncmon_get_curid(); | |
863 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
864 | /* | |
865 | * Find this monitor's starting offset into the `counts_out` array. | |
866 | */ | |
867 | uint64_t *counts = counts_out + (UNCORE_NCTRS * monid); | |
868 | ||
869 | uncmon_read_all_counters(monid, curmonid, ctr_mask, counts); | |
870 | } | |
871 | ||
872 | return 0; | |
873 | } | |
874 | ||
875 | /* | |
876 | * Support for monotonic's mtd_add function. | |
877 | */ | |
878 | ||
879 | /* | |
880 | * Add an event to the current uncore configuration. This doesn't take effect | |
881 | * until the counters are enabled again, so there's no need to involve the | |
882 | * monitors. | |
883 | */ | |
884 | static int | |
885 | uncore_add(struct monotonic_config *config, uint32_t *ctr_out) | |
886 | { | |
887 | if (mt_uncore_enabled) { | |
888 | return EBUSY; | |
889 | } | |
890 | ||
891 | uint32_t available = ~uncore_active_ctrs & config->allowed_ctr_mask; | |
892 | ||
893 | if (available == 0) { | |
894 | return ENOSPC; | |
895 | } | |
896 | ||
897 | uint32_t valid_ctrs = (UINT32_C(1) << UNCORE_NCTRS) - 1; | |
898 | if ((available & valid_ctrs) == 0) { | |
899 | return E2BIG; | |
900 | } | |
901 | ||
902 | uint32_t ctr = __builtin_ffsll(available) - 1; | |
903 | ||
904 | uncore_active_ctrs |= UINT64_C(1) << ctr; | |
905 | uncore_config.uc_events.uce_ctrs[ctr] = config->event; | |
906 | uint64_t cpu_mask = UINT64_MAX; | |
907 | if (config->cpu_mask != 0) { | |
908 | cpu_mask = config->cpu_mask; | |
909 | } | |
910 | for (int i = 0; i < UNCORE_NMONITORS; i++) { | |
911 | #if UNCORE_PER_CLUSTER | |
912 | const unsigned int shift = clust_offs[i]; | |
913 | #else /* UNCORE_PER_CLUSTER */ | |
914 | const unsigned int shift = 0; | |
915 | #endif /* !UNCORE_PER_CLUSTER */ | |
916 | uncore_config.uc_cpu_masks[i].uccm_masks[ctr] = cpu_mask >> shift; | |
917 | } | |
918 | ||
919 | *ctr_out = ctr; | |
920 | return 0; | |
921 | } | |
922 | ||
923 | /* | |
924 | * Support for monotonic's mtd_reset function. | |
925 | */ | |
926 | ||
927 | /* | |
928 | * Reset all configuration and disable the counters if they're currently | |
929 | * counting. | |
930 | */ | |
931 | static void | |
932 | uncore_reset(void) | |
933 | { | |
934 | mt_uncore_enabled = false; | |
935 | ||
936 | unsigned int curmonid = uncmon_get_curid(); | |
937 | ||
938 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
939 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
940 | bool remote = monid != curmonid; | |
941 | ||
942 | int intrs_en = uncmon_lock(mon); | |
943 | if (remote) { | |
944 | #if UNCORE_PER_CLUSTER | |
945 | uncmon_set_counting_locked_r(monid, 0); | |
946 | #endif /* UNCORE_PER_CLUSTER */ | |
947 | } else { | |
948 | uncmon_set_counting_locked_l(monid, 0); | |
949 | } | |
950 | ||
951 | for (int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
952 | if (uncore_active_ctrs & (1U << ctr)) { | |
953 | if (remote) { | |
954 | #if UNCORE_PER_CLUSTER | |
955 | uncmon_write_counter_locked_r(monid, ctr, 0); | |
956 | #endif /* UNCORE_PER_CLUSTER */ | |
957 | } else { | |
958 | uncmon_write_counter_locked_l(monid, ctr, 0); | |
959 | } | |
960 | } | |
961 | } | |
962 | ||
963 | memset(&mon->um_snaps, 0, sizeof(mon->um_snaps)); | |
964 | memset(&mon->um_counts, 0, sizeof(mon->um_counts)); | |
965 | if (remote) { | |
966 | #if UNCORE_PER_CLUSTER | |
967 | uncmon_clear_int_locked_r(monid); | |
968 | #endif /* UNCORE_PER_CLUSTER */ | |
969 | } else { | |
970 | uncmon_clear_int_locked_l(monid); | |
971 | } | |
972 | ||
973 | uncmon_unlock(mon, intrs_en); | |
974 | } | |
975 | ||
976 | uncore_active_ctrs = 0; | |
977 | memset(&uncore_config, 0, sizeof(uncore_config)); | |
978 | ||
979 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
980 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
981 | bool remote = monid != curmonid; | |
982 | ||
983 | int intrs_en = uncmon_lock(mon); | |
984 | if (remote) { | |
985 | #if UNCORE_PER_CLUSTER | |
986 | uncmon_program_events_locked_r(monid); | |
987 | #endif /* UNCORE_PER_CLUSTER */ | |
988 | } else { | |
989 | uncmon_program_events_locked_l(monid); | |
990 | } | |
991 | uncmon_unlock(mon, intrs_en); | |
992 | } | |
993 | } | |
994 | ||
995 | /* | |
996 | * Support for monotonic's mtd_enable function. | |
997 | */ | |
998 | ||
999 | static void | |
1000 | uncmon_set_enabled_l(unsigned int monid, bool enable) | |
1001 | { | |
1002 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
1003 | int intrs_en = uncmon_lock(mon); | |
1004 | ||
1005 | if (enable) { | |
1006 | uncmon_program_events_locked_l(monid); | |
1007 | uncmon_set_counting_locked_l(monid, uncore_active_ctrs); | |
1008 | } else { | |
1009 | uncmon_set_counting_locked_l(monid, 0); | |
1010 | } | |
1011 | ||
1012 | uncmon_unlock(mon, intrs_en); | |
1013 | } | |
1014 | ||
1015 | #if UNCORE_PER_CLUSTER | |
1016 | ||
1017 | static void | |
1018 | uncmon_set_enabled_r(unsigned int monid, bool enable) | |
1019 | { | |
1020 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
1021 | int intrs_en = uncmon_lock(mon); | |
1022 | ||
1023 | if (enable) { | |
1024 | uncmon_program_events_locked_r(monid); | |
1025 | uncmon_set_counting_locked_r(monid, uncore_active_ctrs); | |
1026 | } else { | |
1027 | uncmon_set_counting_locked_r(monid, 0); | |
1028 | } | |
1029 | ||
1030 | uncmon_unlock(mon, intrs_en); | |
1031 | } | |
1032 | ||
1033 | #endif /* UNCORE_PER_CLUSTER */ | |
1034 | ||
1035 | static void | |
1036 | uncore_set_enabled(bool enable) | |
1037 | { | |
1038 | mt_uncore_enabled = enable; | |
1039 | ||
1040 | unsigned int curmonid = uncmon_get_curid(); | |
1041 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
1042 | if (monid != curmonid) { | |
1043 | #if UNCORE_PER_CLUSTER | |
1044 | uncmon_set_enabled_r(monid, enable); | |
1045 | #endif /* UNCORE_PER_CLUSTER */ | |
1046 | } else { | |
1047 | uncmon_set_enabled_l(monid, enable); | |
1048 | } | |
1049 | } | |
1050 | } | |
1051 | ||
1052 | /* | |
1053 | * Hooks in the machine layer. | |
1054 | */ | |
1055 | ||
1056 | static void | |
1057 | uncore_fiq(uint64_t upmsr) | |
1058 | { | |
1059 | /* | |
1060 | * Determine which counters overflowed. | |
1061 | */ | |
1062 | uint64_t disable_ctr_mask = (upmsr & UPMSR_OVF_MASK) >> UPMSR_OVF_POS; | |
1063 | /* should not receive interrupts from inactive counters */ | |
1064 | assert(!(disable_ctr_mask & ~uncore_active_ctrs)); | |
1065 | ||
1066 | unsigned int monid = uncmon_get_curid(); | |
1067 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
1068 | ||
1069 | int intrs_en = uncmon_lock(mon); | |
1070 | ||
1071 | /* | |
1072 | * Disable any counters that overflowed. | |
1073 | */ | |
1074 | uncmon_set_counting_locked_l(monid, | |
1075 | uncore_active_ctrs & ~disable_ctr_mask); | |
1076 | ||
1077 | /* | |
1078 | * With the overflowing counters disabled, capture their counts and reset | |
1079 | * the UPMCs and their snapshots to 0. | |
1080 | */ | |
1081 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1082 | if (UPMSR_OVF(upmsr, ctr)) { | |
1083 | uncmon_update_locked(monid, monid, ctr); | |
1084 | mon->um_snaps[ctr] = 0; | |
1085 | uncmon_write_counter_locked_l(monid, ctr, 0); | |
1086 | } | |
1087 | } | |
1088 | ||
1089 | /* | |
1090 | * Acknowledge the interrupt, now that any overflowed PMCs have been reset. | |
1091 | */ | |
1092 | uncmon_clear_int_locked_l(monid); | |
1093 | ||
1094 | /* | |
1095 | * Re-enable all active counters. | |
1096 | */ | |
1097 | uncmon_set_counting_locked_l(monid, uncore_active_ctrs); | |
1098 | ||
1099 | uncmon_unlock(mon, intrs_en); | |
1100 | } | |
1101 | ||
1102 | static void | |
1103 | uncore_save(void) | |
1104 | { | |
1105 | if (!uncore_active_ctrs) { | |
1106 | return; | |
1107 | } | |
1108 | ||
1109 | unsigned int curmonid = uncmon_get_curid(); | |
1110 | ||
1111 | for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) { | |
1112 | struct uncore_monitor *mon = &uncore_monitors[monid]; | |
1113 | int intrs_en = uncmon_lock(mon); | |
1114 | ||
1115 | if (mt_uncore_enabled) { | |
1116 | if (monid != curmonid) { | |
1117 | #if UNCORE_PER_CLUSTER | |
1118 | uncmon_set_counting_locked_r(monid, 0); | |
1119 | #endif /* UNCORE_PER_CLUSTER */ | |
1120 | } else { | |
1121 | uncmon_set_counting_locked_l(monid, 0); | |
1122 | } | |
1123 | } | |
1124 | ||
1125 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1126 | if (uncore_active_ctrs & (1U << ctr)) { | |
1127 | uncmon_update_locked(monid, curmonid, ctr); | |
1128 | } | |
1129 | } | |
1130 | ||
1131 | mon->um_sleeping = true; | |
1132 | uncmon_unlock(mon, intrs_en); | |
1133 | } | |
1134 | } | |
1135 | ||
1136 | static void | |
1137 | uncore_restore(void) | |
1138 | { | |
1139 | if (!uncore_active_ctrs) { | |
1140 | return; | |
1141 | } | |
1142 | unsigned int curmonid = uncmon_get_curid(); | |
1143 | ||
1144 | struct uncore_monitor *mon = &uncore_monitors[curmonid]; | |
1145 | int intrs_en = uncmon_lock(mon); | |
1146 | if (!mon->um_sleeping) { | |
1147 | goto out; | |
1148 | } | |
1149 | ||
1150 | for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) { | |
1151 | if (uncore_active_ctrs & (1U << ctr)) { | |
1152 | uncmon_write_counter_locked_l(curmonid, ctr, mon->um_snaps[ctr]); | |
1153 | } | |
1154 | } | |
1155 | uncmon_program_events_locked_l(curmonid); | |
1156 | uncmon_init_locked_l(curmonid); | |
1157 | mon->um_sleeping = false; | |
1158 | ||
1159 | out: | |
1160 | uncmon_unlock(mon, intrs_en); | |
1161 | } | |
1162 | ||
1163 | static void | |
1164 | uncore_early_init(void) | |
1165 | { | |
1166 | #if UNCORE_PER_CLUSTER | |
1167 | /* | |
1168 | * Initialize the necessary PIO physical regions from the device tree. | |
1169 | */ | |
1170 | DTEntry armio_entry = NULL; | |
1171 | if ((DTFindEntry("name", "arm-io", &armio_entry) != kSuccess)) { | |
1172 | panic("unable to find arm-io DT entry"); | |
1173 | } | |
1174 | ||
1175 | uint64_t *regs; | |
1176 | unsigned int regs_size = 0; | |
1177 | if (DTGetProperty(armio_entry, "acc-impl", (void **)®s, ®s_size) != | |
1178 | kSuccess) { | |
1179 | panic("unable to find acc-impl DT property"); | |
1180 | } | |
1181 | /* | |
1182 | * Two 8-byte values are expected for each cluster -- the physical address | |
1183 | * of the region and its size. | |
1184 | */ | |
1185 | const unsigned int expected_size = | |
1186 | (typeof(expected_size))sizeof(uint64_t) * __ARM_CLUSTER_COUNT__ * 2; | |
1187 | if (regs_size != expected_size) { | |
1188 | panic("invalid size for acc-impl DT property"); | |
1189 | } | |
1190 | for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) { | |
1191 | acc_impl_phys[i] = regs[i * 2]; | |
1192 | } | |
1193 | acc_impl_size = regs[1]; | |
1194 | ||
1195 | regs_size = 0; | |
1196 | if (DTGetProperty(armio_entry, "cpm-impl", (void **)®s, ®s_size) != | |
1197 | kSuccess) { | |
1198 | panic("unable to find cpm-impl property"); | |
1199 | } | |
1200 | if (regs_size != expected_size) { | |
1201 | panic("invalid size for cpm-impl DT property"); | |
1202 | } | |
1203 | for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) { | |
1204 | cpm_impl_phys[i] = regs[i * 2]; | |
1205 | } | |
1206 | cpm_impl_size = regs[1]; | |
1207 | #endif /* UNCORE_PER_CLUSTER */ | |
1208 | } | |
1209 | ||
1210 | #endif /* HAS_UNCORE_CTRS */ | |
d9a64523 A |
1211 | |
1212 | #pragma mark common hooks | |
1213 | ||
cb323159 A |
1214 | void |
1215 | mt_early_init(void) | |
1216 | { | |
c6bf4f31 A |
1217 | #if HAS_UNCORE_CTRS |
1218 | uncore_early_init(); | |
1219 | #endif /* HAS_UNCORE_CTRS */ | |
cb323159 A |
1220 | } |
1221 | ||
d9a64523 A |
1222 | void |
1223 | mt_cpu_idle(cpu_data_t *cpu) | |
1224 | { | |
1225 | core_idle(cpu); | |
1226 | } | |
1227 | ||
1228 | void | |
1229 | mt_cpu_run(cpu_data_t *cpu) | |
5ba3f43e | 1230 | { |
5ba3f43e A |
1231 | struct mt_cpu *mtc; |
1232 | ||
1233 | assert(cpu != NULL); | |
1234 | assert(ml_get_interrupts_enabled() == FALSE); | |
1235 | ||
1236 | mtc = &cpu->cpu_monotonic; | |
1237 | ||
1238 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
1239 | mt_core_set_snap(i, mtc->mtc_snaps[i]); | |
1240 | } | |
1241 | ||
1242 | /* re-enable the counters */ | |
1243 | core_init_execution_modes(); | |
1244 | ||
0a7de745 | 1245 | core_set_enabled(); |
5ba3f43e A |
1246 | } |
1247 | ||
5ba3f43e A |
1248 | void |
1249 | mt_cpu_down(cpu_data_t *cpu) | |
1250 | { | |
1251 | mt_cpu_idle(cpu); | |
1252 | } | |
1253 | ||
1254 | void | |
1255 | mt_cpu_up(cpu_data_t *cpu) | |
1256 | { | |
5ba3f43e A |
1257 | mt_cpu_run(cpu); |
1258 | } | |
1259 | ||
1260 | void | |
1261 | mt_sleep(void) | |
1262 | { | |
c6bf4f31 A |
1263 | #if HAS_UNCORE_CTRS |
1264 | uncore_save(); | |
1265 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1266 | } |
1267 | ||
1268 | void | |
d9a64523 | 1269 | mt_wake_per_core(void) |
5ba3f43e | 1270 | { |
c6bf4f31 A |
1271 | #if HAS_UNCORE_CTRS |
1272 | if (mt_uncore_initted) { | |
1273 | uncore_restore(); | |
1274 | } | |
1275 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1276 | } |
1277 | ||
cb323159 A |
1278 | uint64_t |
1279 | mt_count_pmis(void) | |
1280 | { | |
1281 | uint64_t npmis = 0; | |
1282 | int max_cpu = ml_get_max_cpu_number(); | |
1283 | for (int i = 0; i <= max_cpu; i++) { | |
1284 | cpu_data_t *cpu = (cpu_data_t *)CpuDataEntries[i].cpu_data_vaddr; | |
1285 | npmis += cpu->cpu_monotonic.mtc_npmis; | |
1286 | } | |
1287 | return npmis; | |
1288 | } | |
1289 | ||
d9a64523 | 1290 | static void |
0a7de745 | 1291 | mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0) |
5ba3f43e | 1292 | { |
5ba3f43e A |
1293 | assert(cpu != NULL); |
1294 | assert(ml_get_interrupts_enabled() == FALSE); | |
1295 | ||
cb323159 A |
1296 | cpu->cpu_monotonic.mtc_npmis += 1; |
1297 | cpu->cpu_stat.pmi_cnt_wake += 1; | |
0a7de745 A |
1298 | |
1299 | #if MONOTONIC_DEBUG | |
1300 | if (!PMCR0_PMI(pmcr0)) { | |
1301 | kprintf("monotonic: mt_cpu_pmi but no PMI (PMCR0 = %#llx)\n", | |
1302 | pmcr0); | |
1303 | } | |
1304 | #else /* MONOTONIC_DEBUG */ | |
1305 | #pragma unused(pmcr0) | |
1306 | #endif /* !MONOTONIC_DEBUG */ | |
1307 | ||
1308 | uint64_t pmsr = __builtin_arm_rsr64(PMSR); | |
1309 | ||
1310 | #if MONOTONIC_DEBUG | |
1311 | kprintf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx", | |
1312 | cpu_number(), pmsr, pmcr0); | |
1313 | #endif /* MONOTONIC_DEBUG */ | |
5ba3f43e | 1314 | |
d9a64523 A |
1315 | /* |
1316 | * monotonic handles any fixed counter PMIs. | |
1317 | */ | |
1318 | for (unsigned int i = 0; i < MT_CORE_NFIXED; i++) { | |
1319 | if ((pmsr & PMSR_OVF(i)) == 0) { | |
1320 | continue; | |
1321 | } | |
1322 | ||
1323 | uint64_t count = mt_cpu_update_count(cpu, i); | |
1324 | cpu->cpu_monotonic.mtc_counts[i] += count; | |
1325 | mt_core_set_snap(i, mt_core_reset_values[i]); | |
1326 | cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i]; | |
1327 | ||
1328 | if (mt_microstackshots && mt_microstackshot_ctr == i) { | |
1329 | bool user_mode = false; | |
1330 | arm_saved_state_t *state = get_user_regs(current_thread()); | |
1331 | if (state) { | |
1332 | user_mode = PSR64_IS_USER(get_saved_state_cpsr(state)); | |
1333 | } | |
1334 | KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1), | |
0a7de745 | 1335 | mt_microstackshot_ctr, user_mode); |
d9a64523 A |
1336 | mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx); |
1337 | } | |
1338 | } | |
1339 | ||
1340 | /* | |
1341 | * KPC handles the configurable counter PMIs. | |
1342 | */ | |
1343 | for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) { | |
5ba3f43e | 1344 | if (pmsr & PMSR_OVF(i)) { |
d9a64523 A |
1345 | extern void kpc_pmi_handler(unsigned int ctr); |
1346 | kpc_pmi_handler(i); | |
5ba3f43e A |
1347 | } |
1348 | } | |
1349 | ||
0a7de745 A |
1350 | #if MACH_ASSERT |
1351 | pmsr = __builtin_arm_rsr64(PMSR); | |
1352 | assert(pmsr == 0); | |
1353 | #endif /* MACH_ASSERT */ | |
1354 | ||
5ba3f43e A |
1355 | core_set_enabled(); |
1356 | } | |
1357 | ||
0a7de745 | 1358 | #if CPMU_AIC_PMI |
5ba3f43e | 1359 | void |
0a7de745 | 1360 | mt_cpmu_aic_pmi(cpu_id_t source) |
d9a64523 | 1361 | { |
0a7de745 A |
1362 | struct cpu_data *curcpu = getCpuDatap(); |
1363 | if (source != curcpu->interrupt_nub) { | |
1364 | panic("monotonic: PMI from IOCPU %p delivered to %p", source, | |
1365 | curcpu->interrupt_nub); | |
1366 | } | |
1367 | mt_cpu_pmi(curcpu, __builtin_arm_rsr64(PMCR0)); | |
1368 | } | |
1369 | #endif /* CPMU_AIC_PMI */ | |
1370 | ||
1371 | void | |
1372 | mt_fiq(void *cpu, uint64_t pmcr0, uint64_t upmsr) | |
1373 | { | |
1374 | #if CPMU_AIC_PMI | |
1375 | #pragma unused(cpu, pmcr0) | |
1376 | #else /* CPMU_AIC_PMI */ | |
1377 | mt_cpu_pmi(cpu, pmcr0); | |
1378 | #endif /* !CPMU_AIC_PMI */ | |
d9a64523 | 1379 | |
c6bf4f31 A |
1380 | #if HAS_UNCORE_CTRS |
1381 | uncore_fiq(upmsr); | |
1382 | #else /* HAS_UNCORE_CTRS */ | |
d9a64523 | 1383 | #pragma unused(upmsr) |
c6bf4f31 | 1384 | #endif /* !HAS_UNCORE_CTRS */ |
d9a64523 A |
1385 | } |
1386 | ||
1387 | static uint32_t mt_xc_sync; | |
1388 | ||
1389 | static void | |
1390 | mt_microstackshot_start_remote(__unused void *arg) | |
1391 | { | |
1392 | cpu_data_t *cpu = getCpuDatap(); | |
1393 | ||
1394 | __builtin_arm_wsr64(PMCR0, PMCR0_INIT); | |
1395 | ||
1396 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
1397 | uint64_t count = mt_cpu_update_count(cpu, i); | |
1398 | cpu->cpu_monotonic.mtc_counts[i] += count; | |
1399 | mt_core_set_snap(i, mt_core_reset_values[i]); | |
1400 | cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i]; | |
1401 | } | |
1402 | ||
1403 | core_set_enabled(); | |
1404 | ||
cb323159 | 1405 | if (os_atomic_dec(&mt_xc_sync, relaxed) == 0) { |
d9a64523 A |
1406 | thread_wakeup((event_t)&mt_xc_sync); |
1407 | } | |
1408 | } | |
1409 | ||
1410 | int | |
1411 | mt_microstackshot_start_arch(uint64_t period) | |
5ba3f43e | 1412 | { |
0a7de745 A |
1413 | uint64_t reset_value = 0; |
1414 | int ovf = os_sub_overflow(CTR_MAX, period, &reset_value); | |
1415 | if (ovf) { | |
1416 | return ERANGE; | |
1417 | } | |
1418 | ||
1419 | mt_core_reset_values[mt_microstackshot_ctr] = reset_value; | |
d9a64523 | 1420 | cpu_broadcast_xcall(&mt_xc_sync, TRUE, mt_microstackshot_start_remote, |
0a7de745 | 1421 | mt_microstackshot_start_remote /* cannot pass NULL */); |
d9a64523 | 1422 | return 0; |
5ba3f43e A |
1423 | } |
1424 | ||
1425 | #pragma mark dev nodes | |
1426 | ||
d9a64523 | 1427 | struct mt_device mt_devices[] = { |
5ba3f43e | 1428 | [0] = { |
d9a64523 | 1429 | .mtd_name = "core", |
5ba3f43e A |
1430 | .mtd_init = core_init, |
1431 | }, | |
c6bf4f31 A |
1432 | #if HAS_UNCORE_CTRS |
1433 | [1] = { | |
1434 | .mtd_name = "uncore", | |
1435 | .mtd_init = uncore_init, | |
1436 | .mtd_add = uncore_add, | |
1437 | .mtd_reset = uncore_reset, | |
1438 | .mtd_enable = uncore_set_enabled, | |
1439 | .mtd_read = uncore_read, | |
1440 | ||
1441 | .mtd_nmonitors = UNCORE_NMONITORS, | |
1442 | .mtd_ncounters = UNCORE_NCTRS, | |
1443 | } | |
1444 | #endif /* HAS_UNCORE_CTRS */ | |
5ba3f43e A |
1445 | }; |
1446 | ||
1447 | static_assert( | |
0a7de745 A |
1448 | (sizeof(mt_devices) / sizeof(mt_devices[0])) == MT_NDEVS, |
1449 | "MT_NDEVS macro should be same as the length of mt_devices"); |