]> git.saurik.com Git - apple/xnu.git/blob - osfmk/pmc/pmc.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / osfmk / pmc / pmc.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <kern/kalloc.h>
25 #include <kern/kern_types.h>
26 #include <kern/locks.h>
27 #include <kern/misc_protos.h>
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/zalloc.h>
31 #include <machine/machine_cpu.h>
32
33 #include <pmc/pmc.h>
34
35 #include <libkern/OSAtomic.h>
36
37 #if defined(__i386__) || defined(__x86_64__)
38 #include <i386/mp.h>
39 #endif
40
41 #if CONFIG_COUNTERS
42
43 /* various debug logging enable */
44 #undef DEBUG_COUNTERS
45
46 typedef uint8_t pmc_state_event_t;
47
48 #define PMC_STATE_EVENT_START 0
49 #define PMC_STATE_EVENT_STOP 1
50 #define PMC_STATE_EVENT_FREE 2
51 #define PMC_STATE_EVENT_INTERRUPT 3
52 #define PMC_STATE_EVENT_END_OF_INTERRUPT 4
53 #define PMC_STATE_EVENT_CONTEXT_IN 5
54 #define PMC_STATE_EVENT_CONTEXT_OUT 6
55 #define PMC_STATE_EVENT_LOAD_FINISHED 7
56 #define PMC_STATE_EVENT_STORE_FINISHED 8
57
58 /* PMC spin timeouts */
59 #define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */
60 #define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */
61
62 uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */
63
64 #ifdef DEBUG_COUNTERS
65 # include <pexpert/pexpert.h>
66 # define COUNTER_DEBUG(...) \
67 do { \
68 kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
69 kprintf(__VA_ARGS__); \
70 } while(0)
71
72 # define PRINT_PERF_MON(x) \
73 do { \
74 kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
75 x, x->object, x->useCount, \
76 (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \
77 1 : 0); \
78 } while(0)
79
80 static const char const * pmc_state_state_name(pmc_state_t state) {
81 switch (PMC_STATE_STATE(state)) {
82 case PMC_STATE_STATE_INVALID:
83 return "INVALID";
84 case PMC_STATE_STATE_STOP:
85 return "STOP";
86 case PMC_STATE_STATE_CAN_RUN:
87 return "CAN_RUN";
88 case PMC_STATE_STATE_LOAD:
89 return "LOAD";
90 case PMC_STATE_STATE_RUN:
91 return "RUN";
92 case PMC_STATE_STATE_STORE:
93 return "STORE";
94 case PMC_STATE_STATE_INTERRUPT:
95 return "INTERRUPT";
96 case PMC_STATE_STATE_DEALLOC:
97 return "DEALLOC";
98 default:
99 return "UNKNOWN";
100 }
101 }
102
103 static const char const * pmc_state_event_name(pmc_state_event_t event) {
104 switch (event) {
105 case PMC_STATE_EVENT_START:
106 return "START";
107 case PMC_STATE_EVENT_STOP:
108 return "STOP";
109 case PMC_STATE_EVENT_FREE:
110 return "FREE";
111 case PMC_STATE_EVENT_INTERRUPT:
112 return "INTERRUPT";
113 case PMC_STATE_EVENT_END_OF_INTERRUPT:
114 return "END OF INTERRUPT";
115 case PMC_STATE_EVENT_CONTEXT_IN:
116 return "CONTEXT IN";
117 case PMC_STATE_EVENT_CONTEXT_OUT:
118 return "CONTEXT OUT";
119 case PMC_STATE_EVENT_LOAD_FINISHED:
120 return "LOAD_FINISHED";
121 case PMC_STATE_EVENT_STORE_FINISHED:
122 return "STORE_FINISHED";
123 default:
124 return "UNKNOWN";
125 }
126 }
127
128 # define PMC_STATE_FORMAT "<%s, %u, %s%s%s>"
129 # define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
130 ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
131 #else
132 # define COUNTER_DEBUG(...)
133 # define PRINT_PERF_MON(x)
134 # define PMC_STATE_FORMAT
135 # define PMC_STATE_ARGS(x)
136 #endif
137
138 /*!struct
139 * pmc_config is the data behind a pmc_config_t.
140 * @member object A pointer to an instance of IOPerformanceCounterConfiguration
141 * @member method A pointer to a method to call to handle PMI.
142 * @member interrupt_after_value Cause a PMI after the counter counts this many
143 * events.
144 * @member refCon Passed to the @method method as the refCon argument.
145 */
146 struct pmc_config {
147 pmc_config_object_t object;
148 volatile pmc_interrupt_method_t method;
149 uint64_t interrupt_after_value;
150 void *refCon;
151 };
152
153 /*
154 * Allocation Zones
155 *
156 * Two allocation zones - Perf zone small and Perf zone big.
157 * Each zone has associated maximums, defined below.
158 * The small zone is the max of the smallest allocation objects (all sizes on
159 * K64):
160 * perf_monitor_t - 48 bytes
161 * perf_monitor_methods_t - 28 bytes
162 * pmc_reservation_t - 48 bytes
163 * pmc_config_t - 32 bytes
164 * perf_small_zone unit size is (on K64) 48 bytes
165 * perf_small_zone max count must be max number of perf monitors, plus (max
166 * number of reservations * 2). The "*2" is because each reservation has a
167 * pmc_config_t within.
168 *
169 * Big zone is max of the larger allocation units
170 * pmc_t - 144 bytes
171 * pmc_methods_t - 116 bytes
172 * perf_big_zone unit size is (on K64) 144 bytes
173 * perf_big_zone max count is the max number of PMCs we support.
174 */
175
176 static zone_t perf_small_zone = NULL;
177 #define MAX_PERF_SMALLS (256 + 8196 + 8196)
178 #define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \
179 sizeof(struct pmc_reservation)), sizeof(struct pmc_config)))
180
181 static zone_t perf_big_zone = NULL;
182 #define MAX_PERF_BIGS (1024)
183 #define PERF_BIG_UNIT_SZ (sizeof(struct pmc))
184
185 /*
186 * Locks and Lock groups
187 */
188 static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
189 static lck_grp_attr_t *pmc_lock_grp_attr;
190 static lck_attr_t *pmc_lock_attr;
191
192 /* PMC tracking queue locks */
193
194 static lck_mtx_t cpu_monitor_queue_mutex; /* protects per-cpu queues at initialisation time */
195 static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */
196 static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */
197
198 /* Reservation tracking queues lock */
199 static lck_spin_t reservations_spin;
200
201 /*
202 * Tracking queues
203 *
204 * Keeps track of registered perf monitors and perf counters
205 */
206
207 static queue_head_t **cpu_monitor_queues = NULL;
208
209 static queue_head_t *perf_monitors_queue = NULL;
210 static volatile uint32_t perf_monitors_count = 0U;
211
212 static queue_head_t *perf_counters_queue = NULL;
213 static volatile uint32_t perf_counters_count = 0U;
214
215 /*
216 * Reservation queues
217 *
218 * Keeps track of all system, task, and thread-level reservations (both active and
219 * inactive).
220 *
221 * We track them all here (rather than in their respective task or thread only)
222 * so that we can inspect our tracking data directly (rather than peeking at
223 * every task and thread) to determine if/when a new reservation would
224 * constitute a conflict.
225 */
226
227 static queue_head_t *system_reservations = NULL;
228 static volatile uint32_t system_reservation_count = 0U;
229
230 static queue_head_t *task_reservations = NULL;
231 static volatile uint32_t task_reservation_count = 0U;
232
233 static queue_head_t *thread_reservations = NULL;
234 static volatile uint32_t thread_reservation_count = 0U;
235
236 #if XNU_KERNEL_PRIVATE
237
238 /*
239 * init_pmc_locks creates and initializes all the locks and lock groups and lock
240 * attributes required for the pmc sub-system.
241 */
242 static void init_pmc_locks(void) {
243 pmc_lock_attr = lck_attr_alloc_init();
244 assert(pmc_lock_attr);
245
246 pmc_lock_grp_attr = lck_grp_attr_alloc_init();
247 assert(pmc_lock_grp_attr);
248
249 pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
250 assert(pmc_lock_grp);
251
252 lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
253 lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
254
255 lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
256
257 lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr);
258 }
259
260 /*
261 * init_pmc_zones initializes the allocation zones used by the pmc subsystem
262 */
263 static void init_pmc_zones(void) {
264 perf_small_zone = zinit(PERF_SMALL_UNIT_SZ,
265 MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS,
266 "pmc.small zone");
267
268 assert(perf_small_zone);
269
270 perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
271 MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS,
272 "pmc.big zone");
273
274 assert(perf_big_zone);
275 }
276
277 /*
278 * init_pmc_queues allocates and initializes the tracking queues for
279 * registering and reserving individual pmcs and perf monitors.
280 */
281 static void init_pmc_queues(void) {
282
283 perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
284 assert(perf_monitors_queue);
285
286 queue_init(perf_monitors_queue);
287
288 perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
289 assert(perf_counters_queue);
290
291 queue_init(perf_counters_queue);
292
293 system_reservations = (queue_head_t*)kalloc(sizeof(queue_t));
294 assert(system_reservations);
295
296 queue_init(system_reservations);
297
298 task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
299 assert(task_reservations);
300
301 queue_init(task_reservations);
302
303 thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
304 assert(thread_reservations);
305
306 queue_init(thread_reservations);
307 }
308
309 /*
310 * pmc_bootstrap brings up all the necessary infrastructure required to use the
311 * pmc sub-system.
312 */
313 __private_extern__
314 void pmc_bootstrap(void) {
315 /* build our alloc zones */
316 init_pmc_zones();
317
318 /* build the locks */
319 init_pmc_locks();
320
321 /* build our tracking queues */
322 init_pmc_queues();
323 }
324
325 #endif /* XNU_KERNEL_PRIVATE */
326
327 /*
328 * Perf Monitor Internals
329 */
330
331 static perf_monitor_t perf_monitor_alloc(void) {
332 /* perf monitors come from the perf small zone */
333 return (perf_monitor_t)zalloc(perf_small_zone);
334 }
335
336 static void perf_monitor_free(void *pm) {
337 zfree(perf_small_zone, pm);
338 }
339
340 static void perf_monitor_init(perf_monitor_t pm, int cpu) {
341 assert(pm);
342
343 pm->object = NULL;
344
345 bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
346
347 pm->useCount = 1; /* initial retain count of 1, for caller */
348
349 pm->reservedCounters = 0;
350
351 pm->cpu = cpu;
352
353 pm->link.next = pm->link.prev = (queue_entry_t)NULL;
354 pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL;
355 }
356
357 /*
358 * perf_monitor_dequeue removes the given perf_monitor_t from the
359 * perf_monitor_queue, thereby unregistering it with the system.
360 */
361 static void perf_monitor_dequeue(perf_monitor_t pm) {
362 lck_spin_lock(&perf_monitor_queue_spin);
363
364 if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
365 /* If this flag is set, the monitor is already validated to be
366 * accessible from a single cpu only.
367 */
368 queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link);
369 }
370
371 /*
372 * remove the @pm object from the @perf_monitor_queue queue (it is of type
373 * <perf_monitor_t> and has a field called @link that is the queue_link_t
374 */
375 queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);
376
377 perf_monitors_count--;
378
379 lck_spin_unlock(&perf_monitor_queue_spin);
380 }
381
382 /*
383 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
384 * thereby registering it for use with the system.
385 */
386 static void perf_monitor_enqueue(perf_monitor_t pm) {
387
388 lck_mtx_lock(&cpu_monitor_queue_mutex);
389 lck_spin_lock(&perf_monitor_queue_spin);
390
391 if (pm->cpu >= 0) {
392 /* Deferred initialisation; saves memory and permits ml_get_max_cpus()
393 * to block until cpu initialisation is complete.
394 */
395 if (!cpu_monitor_queues) {
396 uint32_t max_cpus;
397 queue_head_t **queues;
398 uint32_t i;
399
400 lck_spin_unlock(&perf_monitor_queue_spin);
401
402 max_cpus = ml_get_max_cpus();
403
404 queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus);
405 assert(queues);
406 for (i = 0; i < max_cpus; i++) {
407 queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
408 assert(queue);
409 queue_init(queue);
410 queues[i] = queue;
411 }
412
413 lck_spin_lock(&perf_monitor_queue_spin);
414
415 cpu_monitor_queues = queues;
416 }
417
418 queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link);
419 }
420
421 queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
422 perf_monitors_count++;
423
424 lck_spin_unlock(&perf_monitor_queue_spin);
425 lck_mtx_unlock(&cpu_monitor_queue_mutex);
426 }
427
428 /*
429 * perf_monitor_reference increments the reference count for the given
430 * perf_monitor_t.
431 */
432 static void perf_monitor_reference(perf_monitor_t pm) {
433 assert(pm);
434
435 OSIncrementAtomic(&(pm->useCount));
436 }
437
438 /*
439 * perf_monitor_deallocate decrements the reference count for the given
440 * perf_monitor_t. If the reference count hits 0, the object is released back
441 * to the perf_small_zone via a call to perf_monitor_free().
442 */
443 static void perf_monitor_deallocate(perf_monitor_t pm) {
444 assert(pm);
445
446 /* If we just removed the last reference count */
447 if(1 == OSDecrementAtomic(&(pm->useCount))) {
448 /* Free the object */
449 perf_monitor_free(pm);
450 }
451 }
452
453 /*
454 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
455 * given C++ object pointer that was used when registering with the subsystem.
456 *
457 * If found, the method returns the perf_monitor_t with an extra reference
458 * placed on the object (or NULL if not
459 * found).
460 *
461 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
462 * calling perf_monitor_find.
463 */
464 static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
465 assert(monitor);
466 perf_monitor_t element = NULL;
467 perf_monitor_t found = NULL;
468
469 lck_spin_lock(&perf_monitor_queue_spin);
470
471 queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
472 if(element->object == monitor) {
473 perf_monitor_reference(element);
474 found = element;
475 break;
476 }
477 }
478
479 lck_spin_unlock(&perf_monitor_queue_spin);
480
481 return found;
482 }
483
484 /*
485 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
486 * associated with.
487 */
488
489 static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
490 assert(pm);
491 assert(pmc);
492
493 /* Today, we merely add a reference count now that a new pmc is attached */
494 perf_monitor_reference(pm);
495 }
496
497 /*
498 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
499 * monitor it is associated with.
500 */
501 static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
502 assert(pm);
503 assert(pmc);
504
505 /* Today, we merely remove a reference count now that the pmc is detached */
506 perf_monitor_deallocate(pm);
507 }
508
509 /*
510 * Perf Counter internals
511 */
512
513 static pmc_t pmc_alloc(void) {
514 return (pmc_t)zalloc(perf_big_zone);
515 }
516
517 static void pmc_free(void *pmc) {
518 zfree(perf_big_zone, pmc);
519 }
520
521 /*
522 * pmc_init initializes a newly allocated pmc_t
523 */
524 static void pmc_init(pmc_t pmc) {
525 assert(pmc);
526
527 pmc->object = NULL;
528 pmc->monitor = NULL;
529
530 bzero(&pmc->methods, sizeof(pmc_methods_t));
531
532 /* One reference for the caller */
533 pmc->useCount = 1;
534 }
535
536 /*
537 * pmc_reference increments the reference count of the given pmc_t
538 */
539 static void pmc_reference(pmc_t pmc) {
540 assert(pmc);
541
542 OSIncrementAtomic(&(pmc->useCount));
543 }
544
545 /*
546 * pmc_deallocate decrements the reference count of the given pmc_t. If the
547 * reference count hits zero, the given pmc_t is deallocated and released back
548 * to the allocation zone.
549 */
550 static void pmc_deallocate(pmc_t pmc) {
551 assert(pmc);
552
553 /* If we just removed the last reference count */
554 if(1 == OSDecrementAtomic(&(pmc->useCount))) {
555 /* Free the pmc */
556 pmc_free(pmc);
557 }
558 }
559
560 /*
561 * pmc_dequeue removes the given, newly *un*registered pmc from the
562 * perf_counters_queue.
563 */
564 static void pmc_dequeue(pmc_t pmc) {
565 lck_spin_lock(&perf_counters_queue_spin);
566
567 queue_remove(perf_counters_queue, pmc, pmc_t, link);
568
569 perf_counters_count--;
570
571 lck_spin_unlock(&perf_counters_queue_spin);
572 }
573
574 /*
575 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
576 */
577 static void pmc_enqueue(pmc_t pmc) {
578 lck_spin_lock(&perf_counters_queue_spin);
579
580 queue_enter(perf_counters_queue, pmc, pmc_t, link);
581
582 perf_counters_count++;
583
584 lck_spin_unlock(&perf_counters_queue_spin);
585 }
586
587 /*
588 * pmc_find attempts to locate a pmc_t that was registered with the given
589 * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference
590 * which must be dropped by the caller by calling pmc_deallocate().
591 */
592 static pmc_t pmc_find(pmc_object_t object) {
593 assert(object);
594
595 lck_spin_lock(&perf_counters_queue_spin);
596
597 pmc_t element = NULL;
598 pmc_t found = NULL;
599
600 queue_iterate(perf_counters_queue, element, pmc_t, link) {
601 if(element->object == object) {
602 pmc_reference(element);
603 found = element;
604 break;
605 }
606 }
607
608 lck_spin_unlock(&perf_counters_queue_spin);
609
610 return found;
611 }
612
613 /*
614 * Config internals
615 */
616
617 /* Allocate a pmc_config_t */
618 static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
619 return (pmc_config_t)zalloc(perf_small_zone);
620 }
621
622 /* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
623 static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
624 assert(pmc);
625 assert(config);
626
627 if(config->object) {
628 pmc->methods.free_config(pmc->object, config->object);
629 config->object = NULL;
630 }
631
632 zfree(perf_small_zone, config);
633 }
634
635 static kern_return_t pmc_open(pmc_t pmc) {
636 assert(pmc);
637 assert(pmc->object);
638 assert(pmc->open_object);
639
640 return pmc->methods.open(pmc->object, pmc->open_object);
641 }
642
643 static kern_return_t pmc_close(pmc_t pmc) {
644 assert(pmc);
645 assert(pmc->object);
646 assert(pmc->open_object);
647
648 return pmc->methods.close(pmc->object, pmc->open_object);
649 }
650
651 /*
652 * Reservation Internals
653 */
654
655 static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
656 static void pmc_internal_reservation_store(pmc_reservation_t reservation);
657 static void pmc_internal_reservation_load(pmc_reservation_t reservation);
658
659 static pmc_reservation_t reservation_alloc(void) {
660 /* pmc reservations come from the perf small zone */
661 return (pmc_reservation_t)zalloc(perf_small_zone);
662 }
663
664 /*
665 * reservation_free deallocates and releases all resources associated with the
666 * given pmc_reservation_t. This includes freeing the config used to create the
667 * reservation, decrementing the reference count for the pmc used to create the
668 * reservation, and deallocating the reservation's memory.
669 */
670 static void reservation_free(pmc_reservation_t resv) {
671 /* Free config */
672 if(resv->config) {
673 assert(resv->pmc);
674
675 pmc_free_config(resv->pmc, resv->config);
676
677 resv->config = NULL;
678 }
679
680 /* release PMC */
681 (void)pmc_internal_reservation_set_pmc(resv, NULL);
682
683 /* Free reservation */
684 zfree(perf_small_zone, resv);
685 }
686
687 /*
688 * reservation_init initializes a newly created reservation.
689 */
690 static void reservation_init(pmc_reservation_t resv) {
691 assert(resv);
692
693 resv->pmc = NULL;
694 resv->config = NULL;
695 resv->value = 0ULL;
696
697 resv->flags = 0U;
698 resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
699 resv->active_last_context_in = 0U;
700
701 /*
702 * Since this member is a union, we only need to set either the task
703 * or thread to NULL.
704 */
705 resv->task = TASK_NULL;
706 }
707
708 /*
709 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
710 * there was one set already, it is deallocated (reference is dropped) before
711 * the new one is set. This methods increases the reference count of the given
712 * pmc_t.
713 *
714 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
715 * dropping the reference on any previously set pmc, and setting the reservation
716 * to having no pmc set.
717 */
718 static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
719 assert(resv);
720
721 if(resv->pmc) {
722 (void)pmc_close(resv->pmc);
723 pmc_deallocate(resv->pmc);
724 resv->pmc = NULL;
725 }
726
727 resv->pmc = pmc;
728
729 if(resv->pmc) {
730 pmc_reference(resv->pmc);
731 if(KERN_SUCCESS != pmc_open(resv->pmc)) {
732 pmc_deallocate(resv->pmc);
733 resv->pmc = NULL;
734
735 return KERN_FAILURE;
736 }
737 }
738
739 return KERN_SUCCESS;
740 }
741
742 /*
743 * Used to place reservation into one of the system, task, and thread queues
744 * Assumes the queue's spin lock is already held.
745 */
746 static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
747 assert(queue);
748 assert(resv);
749
750 queue_enter(queue, resv, pmc_reservation_t, link);
751 }
752
753 static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
754 assert(queue);
755 assert(resv);
756
757 queue_remove(queue, resv, pmc_reservation_t, link);
758 }
759
760 /* Returns TRUE if the reservation applies to the current execution context */
761 static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
762 boolean_t ret = FALSE;
763 assert(resv);
764
765 if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
766 ret = TRUE;
767 } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
768 if(current_task() == resv->task) {
769 ret = TRUE;
770 }
771 } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
772 if(current_thread() == resv->thread) {
773 ret = TRUE;
774 }
775 }
776
777 return ret;
778 }
779
780 /*
781 * pmc_accessible_core_count returns the number of logical cores that can access
782 * a given @pmc. 0 means every core in the system.
783 */
784 static uint32_t pmc_accessible_core_count(pmc_t pmc) {
785 assert(pmc);
786
787 uint32_t *cores = NULL;
788 size_t coreCt = 0UL;
789
790 if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
791 &cores, &coreCt)) {
792 coreCt = 0U;
793 }
794
795 return (uint32_t)coreCt;
796 }
797
798 /* spin lock for the queue must already be held */
799 /*
800 * This method will inspect the task/thread of the reservation to see if it
801 * matches the new incoming one (for thread/task reservations only). Will only
802 * return TRUE if the task/thread matches.
803 */
804 static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) {
805 assert(queue);
806 assert(resv);
807
808 boolean_t ret = FALSE;
809 pmc_reservation_t tmp = NULL;
810
811 queue_iterate(queue, tmp, pmc_reservation_t, link) {
812 if(tmp->pmc == resv->pmc) {
813 /* PMC matches - make sure scope matches first */
814 switch(PMC_FLAG_SCOPE(tmp->flags)) {
815 case PMC_FLAG_SCOPE_SYSTEM:
816 /*
817 * Found a reservation in system queue with same pmc - always a
818 * conflict.
819 */
820 ret = TRUE;
821 break;
822 case PMC_FLAG_SCOPE_THREAD:
823 /*
824 * Found one in thread queue with the same PMC as the
825 * argument. Only a conflict if argument scope isn't
826 * thread or system, or the threads match.
827 */
828 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) ||
829 (tmp->thread == resv->thread);
830
831 if(!ret) {
832 /*
833 * so far, no conflict - check that the pmc that is
834 * being reserved isn't accessible from more than
835 * one core, if it is, we need to say it's already
836 * taken.
837 */
838 if(1 != pmc_accessible_core_count(tmp->pmc)) {
839 ret = TRUE;
840 }
841 }
842 break;
843 case PMC_FLAG_SCOPE_TASK:
844 /*
845 * Follow similar semantics for task scope.
846 */
847
848 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
849 (tmp->task == resv->task);
850 if(!ret) {
851 /*
852 * so far, no conflict - check that the pmc that is
853 * being reserved isn't accessible from more than
854 * one core, if it is, we need to say it's already
855 * taken.
856 */
857 if(1 != pmc_accessible_core_count(tmp->pmc)) {
858 ret = TRUE;
859 }
860 }
861
862 break;
863 }
864
865 if(ret) break;
866 }
867 }
868
869 return ret;
870 }
871
872 /*
873 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be
874 * added to its target queue without creating conflicts (target queue is
875 * determined by the reservation's scope flags). Further, this method returns
876 * FALSE if any level contains a reservation for a PMC that can be accessed from
877 * more than just 1 core, and the given reservation also wants the same PMC.
878 */
879 static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
880 assert(resv);
881 boolean_t ret = TRUE;
882
883 if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
884 pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
885 pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
886 ret = FALSE;
887 }
888
889 return ret;
890 }
891
892 static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
893 assert(thread);
894
895 /* See if this thread needs it's PMC flag set */
896 pmc_reservation_t tmp = NULL;
897
898 if(!newFlag) {
899 /*
900 * If the parent task just dropped its reservation, iterate the thread
901 * reservations to see if we need to keep the pmc flag set for the given
902 * thread or not.
903 */
904 lck_spin_lock(&reservations_spin);
905
906 queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
907 if(tmp->thread == thread) {
908 newFlag = TRUE;
909 break;
910 }
911 }
912
913 lck_spin_unlock(&reservations_spin);
914 }
915
916 if(newFlag) {
917 OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
918 } else {
919 OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
920 }
921 }
922
923 /*
924 * This operation is (worst case) O(N*M) where N is number of threads in the
925 * given task, and M is the number of thread reservations in our system.
926 */
927 static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
928 assert(task);
929 thread_t thread = NULL;
930
931 if(newFlag) {
932 OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
933 } else {
934 OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
935 }
936
937 task_lock(task);
938
939 queue_iterate(&task->threads, thread, thread_t, task_threads) {
940 /* propagate the task's mask down to each thread */
941 pmc_internal_update_thread_flag(thread, newFlag);
942 }
943
944 task_unlock(task);
945 }
946
947 /*
948 * pmc_internal_reservation_add adds a reservation to the global tracking queues after
949 * ensuring there are no reservation conflicts. To do this, it takes all the
950 * spin locks for all the queue (to ensure no other core goes and adds a
951 * reservation for the same pmc to a queue that has already been checked).
952 */
953 static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
954 assert(resv);
955
956 boolean_t ret = FALSE;
957
958 /* always lock all three in the same order */
959 lck_spin_lock(&reservations_spin);
960
961 /* Check if the reservation can be added without conflicts */
962 if(pmc_internal_reservation_validate_for_pmc(resv)) {
963
964 /* add reservation to appropriate scope */
965 switch(PMC_FLAG_SCOPE(resv->flags)) {
966 case PMC_FLAG_SCOPE_SYSTEM:
967 /* Simply add it to the system queue */
968 pmc_internal_reservation_enqueue(system_reservations, resv);
969 system_reservation_count++;
970
971 lck_spin_unlock(&reservations_spin);
972
973 break;
974
975 case PMC_FLAG_SCOPE_TASK:
976 assert(resv->task);
977
978 /* Not only do we enqueue it in our local queue for tracking */
979 pmc_internal_reservation_enqueue(task_reservations, resv);
980 task_reservation_count++;
981
982 lck_spin_unlock(&reservations_spin);
983
984 /* update the task mask, and propagate it to existing threads */
985 pmc_internal_update_task_flag(resv->task, TRUE);
986 break;
987
988 /* Thread-switched counter */
989 case PMC_FLAG_SCOPE_THREAD:
990 assert(resv->thread);
991
992 /*
993 * Works the same as a task-switched counter, only at
994 * thread-scope
995 */
996
997 pmc_internal_reservation_enqueue(thread_reservations, resv);
998 thread_reservation_count++;
999
1000 lck_spin_unlock(&reservations_spin);
1001
1002 pmc_internal_update_thread_flag(resv->thread, TRUE);
1003 break;
1004 }
1005
1006 ret = TRUE;
1007 } else {
1008 lck_spin_unlock(&reservations_spin);
1009 }
1010
1011 return ret;
1012 }
1013
1014 static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
1015 uint32_t * cores;
1016 size_t core_cnt;
1017
1018 /* Get the list of accessible cores */
1019 if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
1020 boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);
1021
1022 /* Fast case: the PMC is only accessible from one core and we happen to be on it */
1023 if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
1024 action_func(reservation);
1025 } else {
1026 /* Call action_func on every accessible core */
1027 #if defined(__i386__) || defined(__x86_64__)
1028 size_t ii;
1029 cpumask_t mask = 0;
1030
1031 /* Build a mask for the accessible cores */
1032 if (core_cnt > 0) {
1033 for (ii = 0; ii < core_cnt; ii++) {
1034 mask |= cpu_to_cpumask(cores[ii]);
1035 }
1036 } else {
1037 /* core_cnt = 0 really means all cpus */
1038 mask = CPUMASK_ALL;
1039 }
1040 mp_cpus_call(mask, ASYNC, action_func, reservation);
1041 #else
1042 #error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
1043 #endif
1044 }
1045
1046 ml_set_interrupts_enabled(intrs_enabled);
1047 }
1048
1049 }
1050
1051 /*
1052 * pmc_internal_reservation_remove removes the given reservation from the appropriate
1053 * reservation queue according to its scope.
1054 *
1055 * NOTE: The scope flag must have been set for this method to function.
1056 */
1057 static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
1058 assert(resv);
1059
1060 /*
1061 * Due to the way the macros are written, we can't just blindly queue-remove
1062 * the reservation without knowing which queue it's in. We figure this out
1063 * using the reservation's scope flags.
1064 */
1065
1066 /* Lock the global spin lock */
1067 lck_spin_lock(&reservations_spin);
1068
1069 switch(PMC_FLAG_SCOPE(resv->flags)) {
1070
1071 case PMC_FLAG_SCOPE_SYSTEM:
1072 pmc_internal_reservation_dequeue(system_reservations, resv);
1073 system_reservation_count--;
1074
1075 lck_spin_unlock(&reservations_spin);
1076
1077 break;
1078
1079 case PMC_FLAG_SCOPE_TASK:
1080 /* remove from the global queue */
1081 pmc_internal_reservation_dequeue(task_reservations, resv);
1082 task_reservation_count--;
1083
1084 /* unlock the global */
1085 lck_spin_unlock(&reservations_spin);
1086
1087 /* Recalculate task's counter mask */
1088 pmc_internal_update_task_flag(resv->task, FALSE);
1089
1090 break;
1091
1092 case PMC_FLAG_SCOPE_THREAD:
1093 pmc_internal_reservation_dequeue(thread_reservations, resv);
1094 thread_reservation_count--;
1095
1096 lck_spin_unlock(&reservations_spin);
1097
1098 /* recalculate the thread's counter mask */
1099 pmc_internal_update_thread_flag(resv->thread, FALSE);
1100
1101 break;
1102 }
1103 }
1104
1105 /* Reservation State Machine
1106 *
1107 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a
1108 * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised
1109 * of a state, a count of active contexts, and a set of modifier flags. A state machine defines
1110 * the possible transitions at each event point given the current 3-tuple. Atomicity is handled
1111 * by reading the current 3-tuple, applying the transformations indicated by the state machine
1112 * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails,
1113 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
1114 * available.
1115 *
1116 * The state machine is described using tuple notation for the current state and a related notation
1117 * for describing the transformations. For concisness, the flag and state names are abbreviated as
1118 * follows:
1119 *
1120 * states:
1121 * S = STOP
1122 * CR = CAN_RUN
1123 * L = LOAD
1124 * R = RUN
1125 * ST = STORE
1126 * I = INTERRUPT
1127 * D = DEALLOC
1128 *
1129 * flags:
1130 *
1131 * S = STOPPING
1132 * D = DEALLOCING
1133 * I = INTERRUPTING
1134 *
1135 * The tuple notation is formed from the following pattern:
1136 *
1137 * tuple = < state, active-context-count, flags >
1138 * state = S | CR | L | R | ST | I | D
1139 * active-context-count = 0 | >0 | 1 | >1
1140 * flags = flags flag | blank
1141 * flag = S | D | I
1142 *
1143 * The transform notation is similar, but only describes the modifications made to the current state.
1144 * The notation is formed from the following pattern:
1145 *
1146 * transform = < state, active-context-count, flags >
1147 * state = S | CR | L | R | ST | I | D
1148 * active-context-count = + | - | blank
1149 * flags = flags flag | flags !flag | blank
1150 * flag = S | D | I
1151 *
1152 * And now for the state machine:
1153 * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished
1154 * <CR, 0, > <S, , > <D, , > <L, +, >
1155 * <D, 0, >
1156 * <D, 1, D> < , -, !D>
1157 * <D, >1, D> < , -, >
1158 * <I, 0, D> <D, , !D>
1159 * <I, 0, S> < , , !S> < , , !SD> <S, , !S>
1160 * <I, 0, > < , , S> < , , D> <CR, , >
1161 * <L, 1, D> <ST, -, >
1162 * <L, 1, ID> <ST, -, >
1163 * <L, 1, IS> < , , !SD> <ST, -, >
1164 * <L, 1, S> < , , !S> < , , !SD> <ST, -, >
1165 * <L, 1, > < , , S> < , , D> < , , IS> < , +, > <R, , >
1166 * <L, >1, D> < , -, > <R, -, >
1167 * <L, >1, ID> < , -, > <R, -, >
1168 * <L, >1, IS> < , , !SD> < , -, > <R, -, >
1169 * <L, >1, S> < , , !S> < , , !SD> < , -, > <R, -, >
1170 * <L, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > <R, , >
1171 * <R, 1, D> <ST, -, >
1172 * <R, 1, ID> <ST, -, >
1173 * <R, 1, IS> < , , !SD> <ST, -, >
1174 * <R, 1, S> < , , !S> < , , !SD> <ST, -, >
1175 * <R, 1, > < , , S> < , , D> < , , IS> < , +, > <ST, -, >
1176 * <R, >1, D> < , -, >
1177 * <R, >1, ID> < , -, >
1178 * <R, >1, IS> < , , !SD> < , -, >
1179 * <R, >1, S> < , , !S> < , , !SD> < , -, >
1180 * <R, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, >
1181 * <S, 0, > <CR, , > <D, , >
1182 * <S, 1, ID> <I, -, !I>
1183 * <S, 1, IS> < , , !SD> <I, -, !I>
1184 * <S, 1, S> < , , !S> <D, , !SD> < , -, !S>
1185 * <S, 1, > < , , S> <D, , D> <L, +, > <CR, -, >
1186 * <S, >1, ID> < , -, >
1187 * <S, >1, IS> < , , !SD> < , -, >
1188 * <S, >1, S> < , , !S> <D, , !SD> < , -, >
1189 * <S, >1, > < , , S> <D, , D> <L, +, > < , -, >
1190 * <ST, 0, D> <D, , !D>
1191 * <ST, 0, ID> <I, , !I>
1192 * <ST, 0, IS> < , , !SD> <I, , !I>
1193 * <ST, 0, S> < , , !S> < , , !SD> <S, , !S>
1194 * <ST, 0, > < , , S> < , , D> < , , IS> < , +, > <CR, , >
1195 * <ST, >0, D> < , -, > <D, , >
1196 * <ST, >0, ID> < , -, > <S, , >
1197 * <ST, >0, IS> < , , !SD> < , -, > <S, , >
1198 * <ST, >0, S> < , , !S> < , , !SD> < , -, > <S, , >
1199 * <ST, >0, > < , , S> < , , D> < , , IS> < , +, > < , -, > <L, , >
1200 */
1201
1202 static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
1203 uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
1204
1205 switch (event) {
1206 case PMC_STATE_EVENT_START:
1207 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1208 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1209 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1210 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1211 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1212 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1213 new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
1214 break;
1215 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1216 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1217 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1218 }
1219 break;
1220 }
1221 break;
1222 case PMC_STATE_EVENT_STOP:
1223 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1224 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1225 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1226 break;
1227 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1228 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1229 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1230 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1231 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1232 break;
1233 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1234 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1235 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1236 }
1237 break;
1238 }
1239 break;
1240 case PMC_STATE_EVENT_FREE:
1241 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1242 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1243 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1244 break;
1245 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1246 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1247 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1248 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1249 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1250 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1251 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1252 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1253 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1254 break;
1255 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1256 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1257 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1258 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1259 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1260 break;
1261 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1262 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1263 break;
1264 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1265 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1266 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1267 } else {
1268 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1269 }
1270 break;
1271 }
1272 break;
1273 case PMC_STATE_EVENT_INTERRUPT:
1274 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1275 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1276 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1277 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1278 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
1279 break;
1280 }
1281 break;
1282 case PMC_STATE_EVENT_END_OF_INTERRUPT:
1283 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1284 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
1285 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1286 break;
1287 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1288 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1289 break;
1290 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1291 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1292 break;
1293 }
1294 break;
1295 case PMC_STATE_EVENT_CONTEXT_IN:
1296 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1297 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1298 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1299 break;
1300 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1301 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1302 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1303 new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
1304 break;
1305 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1306 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1307 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1308 }
1309 break;
1310 }
1311 break;
1312 case PMC_STATE_EVENT_CONTEXT_OUT:
1313 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1314 case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
1315 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1316 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
1317 } else {
1318 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1319 }
1320 break;
1321 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1322 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1323 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1324 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1325 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1326 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1327 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1328 }
1329 break;
1330 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
1331 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1332 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1333 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1334 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1335 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1336 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1337 } else {
1338 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1339 }
1340 break;
1341 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1342 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1343 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1344 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
1345 } else {
1346 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1347 }
1348 break;
1349 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1350 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1351 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
1352 } else {
1353 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1354 }
1355 break;
1356 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1357 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1358 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1359 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
1360 } else {
1361 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1362 }
1363 }
1364 break;
1365 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1366 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1367 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1368 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1369 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1370 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1371 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1372 }
1373 break;
1374 }
1375 break;
1376 case PMC_STATE_EVENT_LOAD_FINISHED:
1377 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1378 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1379 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1380 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1381 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1382 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1383 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
1384 } else {
1385 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1386 }
1387 break;
1388 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1389 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
1390 break;
1391 }
1392 break;
1393 case PMC_STATE_EVENT_STORE_FINISHED:
1394 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1395 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1396 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1397 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1398 } else {
1399 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1400 }
1401 break;
1402 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1403 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1404 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1405 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
1406 } else {
1407 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1408 }
1409 break;
1410 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1411 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1412 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1413 } else {
1414 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1415 }
1416 break;
1417 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1418 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1419 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1420 } else {
1421 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
1422 }
1423 break;
1424 }
1425 break;
1426 }
1427
1428 return new_state;
1429 }
1430
1431 static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
1432 pmc_state_t oldState;
1433 pmc_state_t newState;
1434
1435 assert(reservation);
1436
1437 /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition
1438 * or the there is no valid move.
1439 */
1440 do {
1441 oldState = reservation->state;
1442 newState = pmc_internal_reservation_next_state(oldState, event);
1443 } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
1444
1445 if (newState != PMC_STATE_INVALID) {
1446 COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
1447 } else {
1448 COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
1449 }
1450
1451 if (old_state_out != NULL) {
1452 *old_state_out = oldState;
1453 }
1454
1455 return newState;
1456 }
1457
1458 static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
1459 assert(reservation);
1460 pmc_state_t newState;
1461 pmc_state_t oldState;
1462
1463 /* Clear that the this reservation was active when this cpu did its last context in */
1464 OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
1465
1466 /* Move the state machine */
1467 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
1468 return;
1469 }
1470
1471 /* Do any actions required based on the state change */
1472 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
1473 /* Just moved into STORE, so store the reservation. */
1474 pmc_internal_reservation_store(reservation);
1475 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1476 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1477 thread_wakeup((event_t)reservation);
1478 }
1479
1480 }
1481
1482 static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
1483 assert(reservation);
1484 pmc_state_t oldState;
1485 pmc_state_t newState;
1486
1487 /* Move the state machine */
1488 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
1489 return;
1490 }
1491
1492 /* Mark that the reservation was active when this cpu did its last context in */
1493 OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1494
1495 /* Do any actions required based on the state change */
1496 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
1497 /* Just moved into LOAD, so load the reservation. */
1498 pmc_internal_reservation_load(reservation);
1499 }
1500
1501 }
1502
1503 static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
1504 assert(reservation);
1505 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
1506
1507 assert(reservation->pmc);
1508 assert(reservation->config);
1509
1510 pmc_state_t newState;
1511 kern_return_t ret = KERN_SUCCESS;
1512
1513 pmc_t store_pmc = reservation->pmc;
1514 pmc_object_t store_pmc_obj = store_pmc->object;
1515 perf_monitor_t store_pm = store_pmc->monitor;
1516
1517 /*
1518 * Instruct the Perf Monitor that contains this counter to turn
1519 * off the global disable for this counter.
1520 */
1521 ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
1522 if(KERN_SUCCESS != ret) {
1523 COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
1524 return;
1525 }
1526
1527 /* Instruct the counter to disable itself */
1528 ret = store_pmc->methods.disable(store_pmc_obj);
1529 if(KERN_SUCCESS != ret) {
1530 COUNTER_DEBUG(" [error] disable: 0x%x\n", ret);
1531 }
1532
1533 /* store the counter value into the reservation's stored count */
1534 ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
1535 if(KERN_SUCCESS != ret) {
1536 COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret);
1537 return;
1538 }
1539
1540 /* Advance the state machine now that the STORE is finished */
1541 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
1542 return;
1543 }
1544
1545 /* Do any actions required based on the state change */
1546 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
1547 /* Just moved into LOAD, so load the reservation. */
1548 pmc_internal_reservation_load(reservation);
1549 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1550 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1551 thread_wakeup((event_t)reservation);
1552 }
1553
1554 }
1555
1556 static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
1557 assert(reservation);
1558 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);
1559
1560 pmc_state_t newState;
1561 kern_return_t ret = KERN_SUCCESS;
1562
1563 assert(reservation->pmc);
1564 assert(reservation->config);
1565
1566 pmc_t load_pmc = reservation->pmc;
1567 pmc_object_t load_pmc_obj = load_pmc->object;
1568 perf_monitor_t load_pm = load_pmc->monitor;
1569
1570 /* Set the control register up with the stored configuration */
1571 ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
1572 if(KERN_SUCCESS != ret) {
1573 COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret);
1574 return;
1575 }
1576
1577 /* load the counter value */
1578 ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
1579 if(KERN_SUCCESS != ret) {
1580 COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret);
1581 return;
1582 }
1583
1584 /* Locally enable the counter */
1585 ret = load_pmc->methods.enable(load_pmc_obj);
1586 if(KERN_SUCCESS != ret) {
1587 COUNTER_DEBUG(" [error] enable: 0x%x\n", ret);
1588 return;
1589 }
1590
1591 /*
1592 * Instruct the Perf Monitor containing the pmc to enable the
1593 * counter.
1594 */
1595 ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
1596 if(KERN_SUCCESS != ret) {
1597 COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret);
1598 /* not on the hardware. */
1599 return;
1600 }
1601
1602 /* Advance the state machine now that the STORE is finished */
1603 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
1604 return;
1605 }
1606
1607 /* Do any actions required based on the state change */
1608 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
1609 /* Just moved into STORE, so store the reservation. */
1610 pmc_internal_reservation_store(reservation);
1611 }
1612
1613 }
1614
1615 /*
1616 * pmc_accessible_from_core will return TRUE if the given @pmc is directly
1617 * (e.g., hardware) readable from the given logical core.
1618 *
1619 * NOTE: This method is interrupt safe.
1620 */
1621 static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
1622 boolean_t ret = FALSE;
1623
1624 assert(pmc);
1625
1626 ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
1627
1628 return ret;
1629 }
1630
1631 static void pmc_internal_reservation_start_cpu(void * arg) {
1632 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1633
1634 assert(reservation);
1635
1636
1637 if (pmc_internal_reservation_matches_context(reservation)) {
1638 /* We are in context, but the reservation may have already had the context_in method run. Attempt
1639 * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in.
1640 */
1641 uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1642
1643 if ((oldMask & (1U << cpu_number())) == 0) {
1644 COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
1645
1646 pmc_internal_reservation_context_in(reservation);
1647 }
1648 }
1649 }
1650
1651 static void pmc_internal_reservation_stop_cpu(void * arg) {
1652 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1653
1654 assert(reservation);
1655
1656
1657 if (pmc_internal_reservation_matches_context(reservation)) {
1658 COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
1659
1660 pmc_internal_reservation_context_out(reservation);
1661 }
1662 }
1663
1664 /*!fn
1665 * pmc_reservation_interrupt is called when a PMC reservation which was setup
1666 * with an interrupt threshold counts the requested number of events. When the
1667 * underlying counter hits the threshold, an interrupt is generated, and this
1668 * method is called. This method marks the reservation as stopped, and passes
1669 * control off to the user-registered callback method, along with the
1670 * reservation (so that the user can, for example, write a 0 to the counter, and
1671 * restart the reservation).
1672 * This method assumes the reservation has a valid pmc_config_t within.
1673 *
1674 * @param target The pmc_reservation_t that caused the interrupt.
1675 * @param refCon User specified reference constant.
1676 */
1677 static void pmc_reservation_interrupt(void *target, void *refCon) {
1678 pmc_reservation_t reservation = (pmc_reservation_t)target;
1679 pmc_state_t newState;
1680 uint64_t timeout;
1681 uint32_t spins;
1682
1683 assert(reservation);
1684
1685 /* Move the state machine */
1686 if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
1687 return;
1688 }
1689
1690 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
1691 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
1692 * on every cpu that can access the PMC.
1693 */
1694 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
1695
1696 /* Spin waiting for the state to turn to INTERRUPT */
1697 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
1698 timeout += mach_absolute_time();
1699 spins = 0;
1700 while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
1701 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
1702 if (++spins > PMC_SPIN_THRESHOLD) {
1703 if (mach_absolute_time() > timeout) {
1704 pmc_spin_timeout_count++;
1705 assert(0);
1706 }
1707 }
1708
1709 cpu_pause();
1710 }
1711
1712 assert(reservation->config);
1713 assert(reservation->config->method);
1714
1715 /* Call the registered callback handler */
1716 #if DEBUG_COUNTERS
1717 uint64_t start = mach_absolute_time();
1718 #endif /* DEBUG */
1719
1720 (void)reservation->config->method(reservation, refCon);
1721
1722 #if DEBUG_COUNTERS
1723 uint64_t end = mach_absolute_time();
1724 if((end - start) > 5000ULL) {
1725 kprintf("%s - user method %p took %llu ns\n", __FUNCTION__,
1726 reservation->config->method, (end - start));
1727 }
1728 #endif
1729
1730 /* Move the state machine */
1731 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
1732 return;
1733 }
1734
1735 /* Do any post-move actions necessary */
1736 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
1737 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
1738 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1739 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1740 thread_wakeup((event_t)reservation);
1741 }
1742 }
1743
1744 /*
1745 * Apple-private KPI for Apple kext's (IOProfileFamily) only
1746 */
1747
1748 #if 0
1749 #pragma mark -
1750 #pragma mark IOProfileFamily private KPI
1751 #endif
1752
1753 /*
1754 * perf_monitor_register registers a new Performance Monitor, and its associated
1755 * callback methods. The given perf_monitor_object_t is the first argument to
1756 * each callback when they are called.
1757 */
1758 kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
1759 perf_monitor_methods_t *methods) {
1760 int cpu = -1;
1761
1762 COUNTER_DEBUG("registering perf monitor %p\n", monitor);
1763
1764 if(!monitor || !methods) {
1765 return KERN_INVALID_ARGUMENT;
1766 }
1767
1768 /* Protect against out-of-date driver kexts */
1769 if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
1770 return KERN_INVALID_ARGUMENT;
1771 }
1772
1773 /* If the monitor requires idle notifications, ensure that it is
1774 * accessible from a single core only.
1775 */
1776 if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
1777 uint32_t *cores;
1778 size_t core_cnt;
1779
1780 if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) {
1781 /*
1782 * Guard against disabled cores - monitors will always match and
1783 * attempt registration, irrespective of 'cpus=x' boot-arg.
1784 */
1785 if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) {
1786 cpu = cores[0];
1787 } else {
1788 return KERN_INVALID_ARGUMENT;
1789 }
1790 }
1791 }
1792
1793 /* All methods are required */
1794 if(!methods->accessible_cores |
1795 !methods->enable_counters || !methods->disable_counters ||
1796 !methods->on_idle || !methods->on_idle_exit) {
1797 return KERN_INVALID_ARGUMENT;
1798 }
1799
1800 /* prevent dupes. */
1801 perf_monitor_t dupe = perf_monitor_find(monitor);
1802 if(dupe) {
1803 COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
1804 perf_monitor_deallocate(dupe);
1805 return KERN_FAILURE;
1806 }
1807
1808 perf_monitor_t pm = perf_monitor_alloc();
1809 if(!pm) {
1810 return KERN_RESOURCE_SHORTAGE;
1811 }
1812
1813 /* initialize the object */
1814 perf_monitor_init(pm, cpu);
1815
1816 /* copy in the registration info */
1817 pm->object = monitor;
1818 memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
1819
1820 /* place it in the tracking queues */
1821 perf_monitor_enqueue(pm);
1822
1823 /* debug it */
1824 PRINT_PERF_MON(pm);
1825
1826 return KERN_SUCCESS;
1827 }
1828
1829 /*
1830 * perf_monitor_unregister unregisters a previously registered Perf Monitor,
1831 * looking it up by reference pointer (the same that was used in
1832 * perf_monitor_register()).
1833 */
1834 kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
1835 kern_return_t ret = KERN_FAILURE;
1836
1837 COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);
1838
1839 if(!monitor) {
1840 return KERN_INVALID_ARGUMENT;
1841 }
1842
1843 perf_monitor_t pm = perf_monitor_find(monitor);
1844 if(pm) {
1845 /* Remove it from the queues. */
1846 perf_monitor_dequeue(pm);
1847
1848 /* drop extra retain from find */
1849 perf_monitor_deallocate(pm);
1850
1851 /* and release the object */
1852 perf_monitor_deallocate(pm);
1853
1854 ret = KERN_SUCCESS;
1855 } else {
1856 COUNTER_DEBUG("could not find a registered pm that matches!\n");
1857 }
1858
1859 return ret;
1860 }
1861
1862 /*
1863 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
1864 * associated with a Perf Monitor. Perf Monitors are looked up by the reference
1865 * pointer that was used to previously register them.
1866 *
1867 * PMCs are registered with a reference pointer (@pmc_object), and a set of
1868 * callback methods. When the given callback methods are called from xnu, the
1869 * first argument will always be the reference pointer used to register the PMC.
1870 *
1871 * NOTE: @monitor must have been successfully registered via
1872 * perf_monitor_register before this method will succeed.
1873 */
1874 kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
1875 pmc_methods_t *methods, void *object) {
1876
1877 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1878
1879 if(!monitor || !pmc_object || !methods || !object) {
1880 return KERN_INVALID_ARGUMENT;
1881 }
1882
1883 /* Prevent version mismatches */
1884 if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
1885 COUNTER_DEBUG("version mismatch\n");
1886 return KERN_INVALID_ARGUMENT;
1887 }
1888
1889 /* All methods are required. */
1890 if(!methods->create_config ||
1891 !methods->free_config ||
1892 !methods->config_set_value ||
1893 !methods->config_set_threshold ||
1894 !methods->config_set_handler ||
1895 !methods->set_config ||
1896 !methods->get_monitor ||
1897 !methods->get_name ||
1898 !methods->accessible_from_core ||
1899 !methods->accessible_cores ||
1900 !methods->get_count ||
1901 !methods->set_count ||
1902 !methods->disable ||
1903 !methods->enable ||
1904 !methods->open ||
1905 !methods->close) {
1906 return KERN_INVALID_ARGUMENT;
1907 }
1908
1909 /* make sure this perf monitor object is already registered */
1910 /*
1911 * NOTE: this adds a reference to the parent, so we'll have to drop it in
1912 * any failure code paths from here on out.
1913 */
1914 perf_monitor_t pm = perf_monitor_find(monitor);
1915 if(!pm) {
1916 COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
1917 return KERN_INVALID_ARGUMENT;
1918 }
1919
1920 /* make a new pmc */
1921 pmc_t pmc = pmc_alloc();
1922 if(!pmc) {
1923 /* drop the extra reference from perf_monitor_find() */
1924 perf_monitor_deallocate(pm);
1925 return KERN_RESOURCE_SHORTAGE;
1926 }
1927
1928 /* init it */
1929 pmc_init(pmc);
1930
1931 pmc->object = pmc_object;
1932 pmc->open_object = object;
1933
1934 /* copy the callbacks in */
1935 memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));
1936
1937 pmc->monitor = pm;
1938
1939 perf_monitor_add_pmc(pmc->monitor, pmc);
1940
1941 /* enqueue it in our tracking queue */
1942 pmc_enqueue(pmc);
1943
1944 /* drop extra reference from perf_monitor_find() */
1945 perf_monitor_deallocate(pm);
1946
1947 return KERN_SUCCESS;
1948 }
1949
1950 /*
1951 * pmc_unregister unregisters a previously registered PMC, looking it up by
1952 * reference point to *both* the Perf Monitor it was created with, and the PMC's
1953 * reference pointer itself.
1954 */
1955 kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
1956 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1957
1958 if(!monitor || !pmc_object) {
1959 return KERN_INVALID_ARGUMENT;
1960 }
1961
1962 pmc_t pmc = pmc_find(pmc_object);
1963 if(!pmc) {
1964 COUNTER_DEBUG("Could not find a matching pmc.\n");
1965 return KERN_FAILURE;
1966 }
1967
1968 /* remove it from the global queue */
1969 pmc_dequeue(pmc);
1970
1971 perf_monitor_remove_pmc(pmc->monitor, pmc);
1972
1973 /* remove extra reference count from pmc_find() */
1974 pmc_deallocate(pmc);
1975
1976 /* dealloc the pmc */
1977 pmc_deallocate(pmc);
1978
1979 return KERN_SUCCESS;
1980 }
1981
1982 static void perf_monitor_reservation_add(perf_monitor_t monitor) {
1983 assert(monitor);
1984 OSIncrementAtomic(&(monitor->reservedCounters));
1985 }
1986
1987 static void perf_monitor_reservation_remove(perf_monitor_t monitor) {
1988 assert(monitor);
1989 OSDecrementAtomic(&(monitor->reservedCounters));
1990 }
1991
1992 #if 0
1993 #pragma mark -
1994 #pragma mark KPI
1995 #endif
1996
1997 /*
1998 * Begin in-kernel and in-kext KPI methods
1999 */
2000
2001 /*
2002 * pmc_create_config creates a new configuration area from a given @pmc.
2003 *
2004 * NOTE: This method is not interrupt safe.
2005 */
2006 kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
2007 pmc_config_t tmp = NULL;
2008
2009 if(!pmc || !config) {
2010 return KERN_INVALID_ARGUMENT;
2011 }
2012
2013 pmc_reference(pmc);
2014
2015 tmp = pmc_config_alloc(pmc);
2016 if(tmp) {
2017 tmp->object = pmc->methods.create_config(pmc->object);
2018
2019 if(!tmp->object) {
2020 pmc_config_free(pmc, tmp);
2021 tmp = NULL;
2022 } else {
2023 tmp->interrupt_after_value = 0ULL;
2024 tmp->method = NULL;
2025 tmp->refCon = NULL;
2026 }
2027 }
2028
2029 pmc_deallocate(pmc);
2030
2031 if(!tmp) {
2032 return KERN_RESOURCE_SHORTAGE;
2033 }
2034
2035 *config = tmp;
2036
2037 return KERN_SUCCESS;
2038 }
2039
2040 /*
2041 * pmc_free_config frees a configuration area created from a given @pmc
2042 *
2043 * NOTE: This method is not interrupt safe.
2044 */
2045 void pmc_free_config(pmc_t pmc, pmc_config_t config) {
2046 assert(pmc);
2047 assert(config);
2048
2049 pmc_reference(pmc);
2050
2051 pmc_config_free(pmc, config);
2052
2053 pmc_deallocate(pmc);
2054 }
2055
2056 /*
2057 * pmc_config_set_value sets up configuration area key-value pairs. These pairs
2058 * are to be either pre-known, or looked up via CoreProfile.framework.
2059 *
2060 * NOTE: This method is not interrupt safe.
2061 */
2062 kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
2063 uint8_t id, uint64_t value) {
2064
2065 kern_return_t ret = KERN_INVALID_ARGUMENT;
2066
2067 if(!pmc || !config) {
2068 return ret;
2069 }
2070
2071 pmc_reference(pmc);
2072
2073 ret = pmc->methods.config_set_value(config->object, id, value);
2074
2075 pmc_deallocate(pmc);
2076
2077 return ret;
2078 }
2079
2080 /*
2081 * pmc_config_set_interrupt_threshold modifies a config object, instructing
2082 * the pmc that it should generate a call to the given pmc_interrupt_method_t
2083 * after the counter counts @threshold events.
2084 *
2085 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
2086 * as the first argument when the interrupt handler is invoked, and the given
2087 * @refCon (which may be NULL) as the second.
2088 *
2089 * See pmc_interrupt_method_t.
2090 *
2091 * NOTE: This method is not interrupt safe.
2092 */
2093 kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config,
2094 uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
2095 kern_return_t ret = KERN_INVALID_ARGUMENT;
2096
2097 if(!config || !pmc) {
2098 return ret;
2099 }
2100
2101 assert(config);
2102 assert(pmc);
2103
2104 pmc_reference(pmc);
2105
2106 do {
2107 /*
2108 * We have a minor annoyance to side-step here. The driver layer expects
2109 * the config to never change once a reservation has been taken out with
2110 * it. However, in order to have the PMI method have the reservation as
2111 * the first argument (in order to allow the user-method to, for
2112 * example, write a 0 to it, and restart it), we need to create the
2113 * pmc_reservation_t before setting it up in the config object.
2114 * We overcome this by caching the method in the pmc_config_t stand-in,
2115 * and mutating the pmc_config_object_t just before returning a
2116 * reservation (in pmc_reserve() and friends, below).
2117 */
2118
2119 /* might as well stash this away too. */
2120 config->interrupt_after_value = threshold;
2121 config->method = method;
2122 config->refCon = refCon;
2123
2124 ret = KERN_SUCCESS;
2125
2126 }while(0);
2127
2128 pmc_deallocate(pmc);
2129
2130 return ret;
2131 }
2132
2133 /*
2134 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
2135 * of pmc_t's returned. Callers should free this list with a call to
2136 * pmc_free_pmc_list().
2137 *
2138 * NOTE: This method is not interrupt safe.
2139 */
2140 kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
2141 pmc_t *array = NULL;
2142 pmc_t pmc = NULL;
2143 size_t count = 0UL;
2144
2145 do {
2146 /* Copy down (to the stack) the count of perf counters */
2147 vm_size_t size = perf_counters_count;
2148
2149 /* Allocate that sized chunk */
2150 array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
2151 if(!array) {
2152 return KERN_RESOURCE_SHORTAGE;
2153 }
2154
2155 /* Take the spin lock */
2156 lck_spin_lock(&perf_counters_queue_spin);
2157
2158 /* verify the size didn't change while we were allocating */
2159 if(size != perf_counters_count) {
2160 /*
2161 * queue size has changed between alloc and now - go back and
2162 * make another pass.
2163 */
2164
2165 /* drop the lock */
2166 lck_spin_unlock(&perf_counters_queue_spin);
2167
2168 /* free the block */
2169 kfree(array, sizeof(pmc_t) * size);
2170 array = NULL;
2171 }
2172
2173 /* if we get here, and array is NULL, we try again. */
2174 }while(!array);
2175
2176 /* copy the bits out */
2177 queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
2178 /* copy out the pointer */
2179 array[count++] = pmc;
2180 }
2181
2182 lck_spin_unlock(&perf_counters_queue_spin);
2183
2184 /* return the list and the size */
2185 *pmcs = array;
2186 *pmcCount = count;
2187
2188 return KERN_SUCCESS;
2189 }
2190
2191 /*
2192 * pmc_free_pmc_list frees an array of pmc_t that has been returned from
2193 * pmc_get_pmc_list.
2194 *
2195 * NOTE: This method is not interrupt safe.
2196 */
2197 void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
2198 if(pmcs && pmcCount) {
2199 COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);
2200
2201 kfree(pmcs, pmcCount * sizeof(pmc_t));
2202 }
2203 }
2204
2205 kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
2206 kern_return_t ret = KERN_INVALID_ARGUMENT;
2207
2208 if(!name || !pmcs || !pmcCount) {
2209 return ret;
2210 }
2211
2212 pmc_t *list = NULL;
2213 size_t count = 0UL;
2214
2215 if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
2216 size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
2217 size_t len = strlen(name);
2218
2219 for(ii = 0UL; ii < count; ii++) {
2220 const char *pmcName = pmc_get_name(list[ii]);
2221
2222 if(strlen(pmcName) < len) {
2223 /*
2224 * If the pmc name is shorter than the requested match, it's no
2225 * match, as we're looking for the most specific match(es).
2226 */
2227 continue;
2228 }
2229
2230 if(0 == strncmp(name, pmcName, len)) {
2231 pmc_t temp = list[ii];
2232
2233 // move matches to the head of the array.
2234 list[ii] = list[swapPtr];
2235 list[swapPtr] = temp;
2236 swapPtr++;
2237
2238 // keep a count of the matches
2239 matchCount++;
2240 }
2241 }
2242
2243 if(matchCount) {
2244 /*
2245 * If we have matches, they are all at the head of the array, so
2246 * just allocate enough space for @matchCount pmc_t's, and copy the
2247 * head of the array to the new allocation. Then free the old
2248 * allocation.
2249 */
2250
2251 pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
2252 if(result) {
2253 // copy the matches
2254 memcpy(result, list, sizeof(pmc_t) * matchCount);
2255
2256 ret = KERN_SUCCESS;
2257 }
2258
2259 pmc_free_pmc_list(list, count);
2260
2261 if(!result) {
2262 *pmcs = NULL;
2263 *pmcCount = 0UL;
2264 return KERN_RESOURCE_SHORTAGE;
2265 }
2266
2267 *pmcs = result;
2268 *pmcCount = matchCount;
2269 } else {
2270 *pmcs = NULL;
2271 *pmcCount = 0UL;
2272 }
2273 }
2274
2275 return ret;
2276 }
2277
2278 /*
2279 * pmc_get_name returns a pointer (not copied) to the human-readable name of the
2280 * given pmc.
2281 *
2282 * NOTE: Driver authors must take care to not allocate during this method, as
2283 * this method *IS* interrupt safe.
2284 */
2285 const char *pmc_get_name(pmc_t pmc) {
2286 assert(pmc);
2287
2288 const char *name = pmc->methods.get_name(pmc->object);
2289
2290 return name;
2291 }
2292
2293 /*
2294 * pmc_get_accessible_core_list returns a pointer to an array of logical core
2295 * numbers (as well as the size of that array) that represent the local cores
2296 * (hardware threads) from which the given @pmc can be accessed directly.
2297 *
2298 * NOTE: This method is interrupt safe.
2299 */
2300 kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
2301 size_t *logicalCoreCt) {
2302
2303 kern_return_t ret = KERN_INVALID_ARGUMENT;
2304
2305 if(!pmc || !logicalCores || !logicalCoreCt) {
2306 return ret;
2307 }
2308
2309 ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);
2310
2311 return ret;
2312 }
2313
2314 static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
2315 assert(resv);
2316 assert(resv->pmc);
2317 assert(config);
2318 assert(config->object);
2319
2320 /* If there's no PMI to setup, return success */
2321 if(config->interrupt_after_value && config->method) {
2322
2323 /* set the threshold */
2324 kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
2325 config->interrupt_after_value);
2326
2327 if(KERN_SUCCESS != ret) {
2328 /*
2329 * This is the most useful error message here, as this only happens
2330 * as a result of pmc_reserve*()
2331 */
2332 COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
2333 return FALSE;
2334 }
2335
2336 if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object,
2337 (void *)resv, &pmc_reservation_interrupt, config->refCon)) {
2338
2339 COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
2340 return FALSE;
2341 }
2342 }
2343
2344 return TRUE;
2345 }
2346
2347 /*
2348 * pmc_reserve will attempt to reserve the given @pmc, with a given
2349 * configuration object, for counting system-wide. This method will fail with
2350 * KERN_FAILURE if the given pmc is already reserved at any scope.
2351 *
2352 * This method consumes the given configuration object if it returns
2353 * KERN_SUCCESS. Any other return value indicates the caller
2354 * must free the config object via pmc_free_config().
2355 *
2356 * NOTE: This method is NOT interrupt safe.
2357 */
2358 kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
2359 pmc_reservation_t *reservation) {
2360
2361 if(!pmc || !config || !reservation) {
2362 return KERN_INVALID_ARGUMENT;
2363 }
2364
2365 pmc_reservation_t resv = reservation_alloc();
2366 if(!resv) {
2367 return KERN_RESOURCE_SHORTAGE;
2368 }
2369
2370 reservation_init(resv);
2371
2372 resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
2373 resv->config = config;
2374
2375 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2376 resv->config = NULL;
2377 return KERN_FAILURE;
2378 }
2379
2380 /* enqueue reservation in proper place */
2381 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2382 /* Prevent free of config object */
2383 resv->config = NULL;
2384
2385 reservation_free(resv);
2386 return KERN_FAILURE;
2387 }
2388
2389 perf_monitor_reservation_add(pmc->monitor);
2390
2391 *reservation = resv;
2392
2393 return KERN_SUCCESS;
2394 }
2395
2396 /*
2397 * pmc_reserve_task will attempt to reserve the given @pmc with a given
2398 * configuration object, for counting when the given @task is running on any
2399 * logical core that can directly access the given @pmc. This method will fail
2400 * with KERN_FAILURE if the given pmc is already reserved at either system or
2401 * thread scope.
2402 *
2403 * This method consumes the given configuration object if it returns
2404 * KERN_SUCCESS. Any other return value indicates the caller
2405 * must free the config object via pmc_free_config().
2406 *
2407 * NOTE: You can reserve the same pmc for N different tasks concurrently.
2408 * NOTE: This method is NOT interrupt safe.
2409 */
2410 kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config,
2411 task_t task, pmc_reservation_t *reservation) {
2412
2413 if(!pmc || !config || !reservation || !task) {
2414 return KERN_INVALID_ARGUMENT;
2415 }
2416
2417 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
2418 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2419 return KERN_INVALID_ARGUMENT;
2420 }
2421
2422 pmc_reservation_t resv = reservation_alloc();
2423 if(!resv) {
2424 return KERN_RESOURCE_SHORTAGE;
2425 }
2426
2427 reservation_init(resv);
2428
2429 resv->flags |= PMC_FLAG_SCOPE_TASK;
2430 resv->task = task;
2431
2432 resv->config = config;
2433
2434 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2435 resv->config = NULL;
2436 return KERN_FAILURE;
2437 }
2438
2439 /* enqueue reservation in proper place */
2440 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2441 /* Prevent free of config object */
2442 resv->config = NULL;
2443
2444 reservation_free(resv);
2445 return KERN_FAILURE;
2446 }
2447
2448 perf_monitor_reservation_add(pmc->monitor);
2449
2450 *reservation = resv;
2451
2452 return KERN_SUCCESS;
2453 }
2454
2455 /*
2456 * pmc_reserve_thread will attempt to reserve the given @pmc with a given
2457 * configuration object, for counting when the given @thread is running on any
2458 * logical core that can directly access the given @pmc. This method will fail
2459 * with KERN_FAILURE if the given pmc is already reserved at either system or
2460 * task scope.
2461 *
2462 * This method consumes the given configuration object if it returns
2463 * KERN_SUCCESS. Any other return value indicates the caller
2464 * must free the config object via pmc_free_config().
2465 *
2466 * NOTE: You can reserve the same pmc for N different threads concurrently.
2467 * NOTE: This method is NOT interrupt safe.
2468 */
2469 kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config,
2470 thread_t thread, pmc_reservation_t *reservation) {
2471 if(!pmc || !config || !reservation || !thread) {
2472 return KERN_INVALID_ARGUMENT;
2473 }
2474
2475 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
2476 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2477 return KERN_INVALID_ARGUMENT;
2478 }
2479
2480 pmc_reservation_t resv = reservation_alloc();
2481 if(!resv) {
2482 return KERN_RESOURCE_SHORTAGE;
2483 }
2484
2485 reservation_init(resv);
2486
2487 resv->flags |= PMC_FLAG_SCOPE_THREAD;
2488 resv->thread = thread;
2489
2490 resv->config = config;
2491
2492 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2493 resv->config = NULL;
2494 return KERN_FAILURE;
2495 }
2496
2497 /* enqueue reservation in proper place */
2498 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2499 /* Prevent free of config object */
2500 resv->config = NULL;
2501
2502 reservation_free(resv);
2503 return KERN_FAILURE;
2504 }
2505
2506 perf_monitor_reservation_add(pmc->monitor);
2507
2508 *reservation = resv;
2509
2510 return KERN_SUCCESS;
2511 }
2512
2513 /*
2514 * pmc_reservation_start instructs the given reservation to start counting as
2515 * soon as possible.
2516 *
2517 * NOTE: This method is interrupt safe.
2518 */
2519 kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
2520 pmc_state_t newState;
2521
2522 if(!reservation) {
2523 return KERN_INVALID_ARGUMENT;
2524 }
2525
2526 /* Move the state machine */
2527 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
2528 return KERN_FAILURE;
2529 }
2530
2531 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2532 * broadcast right before it leaves
2533 */
2534 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {
2535 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2536 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
2537 * on every cpu that can access the PMC.
2538 */
2539 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
2540 }
2541
2542 return KERN_SUCCESS;
2543 }
2544
2545 /*
2546 * pmc_reservation_stop instructs the given reservation to stop counting as
2547 * soon as possible. When this method returns, the pmc will be marked as stopping
2548 * and subsequent calls to pmc_reservation_start will succeed. This does not mean
2549 * that the pmc hardware has _actually_ stopped running. Assuming no other changes
2550 * to the reservation state, the pmc hardware _will_ stop shortly.
2551 *
2552 */
2553 kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
2554 pmc_state_t newState;
2555
2556 if(!reservation) {
2557 return KERN_INVALID_ARGUMENT;
2558 }
2559
2560 /* Move the state machine */
2561 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
2562 return KERN_FAILURE;
2563 }
2564
2565 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2566 * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting.
2567 */
2568 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {
2569 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2570 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2571 * on every cpu that can access the PMC.
2572 */
2573
2574 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2575 }
2576
2577 return KERN_SUCCESS;
2578 }
2579
2580 /*
2581 * pmc_reservation_read will read the event count associated with a reservation.
2582 * If the caller is current executing in a context that both a) matches the
2583 * reservation's context, and b) can access the reservation's pmc directly, the
2584 * value will be read from hardware. Otherwise, this returns the reservation's
2585 * stored value.
2586 *
2587 * NOTE: This method is interrupt safe.
2588 * NOTE: When not on the interrupt stack, this method may block.
2589 */
2590 kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
2591 kern_return_t ret = KERN_FAILURE;
2592 uint64_t timeout;
2593 uint32_t spins;
2594
2595 if(!reservation || !value) {
2596 return KERN_INVALID_ARGUMENT;
2597 }
2598
2599 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2600 timeout += mach_absolute_time();
2601 spins = 0;
2602 do {
2603 uint32_t state = reservation->state;
2604
2605 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2606 /* Attempt read from hardware via drivers. */
2607
2608 assert(reservation->pmc);
2609
2610 ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
2611
2612 break;
2613 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2614 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2615 /* Spin */
2616 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2617 if (++spins > PMC_SPIN_THRESHOLD) {
2618 if (mach_absolute_time() > timeout) {
2619 pmc_spin_timeout_count++;
2620 assert(0);
2621 }
2622 }
2623
2624 cpu_pause();
2625 } else {
2626 break;
2627 }
2628 } while (1);
2629
2630 /* If the direct hardware read failed (for whatever reason) */
2631 if(KERN_SUCCESS != ret) {
2632 /* Read stored value */
2633 *value = reservation->value;
2634 }
2635
2636 return KERN_SUCCESS;
2637 }
2638
2639 /*
2640 * pmc_reservation_write will write the event count associated with a reservation.
2641 * If the caller is current executing in a context that both a) matches the
2642 * reservation's context, and b) can access the reservation's pmc directly, the
2643 * value will be written to hardware. Otherwise, this writes the reservation's
2644 * stored value.
2645 *
2646 * NOTE: This method is interrupt safe.
2647 * NOTE: When not on the interrupt stack, this method may block.
2648 */
2649 kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
2650 kern_return_t ret = KERN_FAILURE;
2651 uint64_t timeout;
2652 uint32_t spins;
2653
2654 if(!reservation) {
2655 return KERN_INVALID_ARGUMENT;
2656 }
2657
2658 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2659 timeout += mach_absolute_time();
2660 spins = 0;
2661 do {
2662 uint32_t state = reservation->state;
2663
2664 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2665 /* Write to hardware via drivers. */
2666 assert(reservation->pmc);
2667
2668 ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
2669 break;
2670 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2671 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2672 /* Spin */
2673 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2674 if (++spins > PMC_SPIN_THRESHOLD) {
2675 if (mach_absolute_time() > timeout) {
2676 pmc_spin_timeout_count++;
2677 assert(0);
2678 }
2679 }
2680
2681 cpu_pause();
2682 } else {
2683 break;
2684 }
2685 } while (1);
2686
2687 if(KERN_SUCCESS != ret) {
2688 /* Write stored value */
2689 reservation->value = value;
2690 }
2691
2692 return KERN_SUCCESS;
2693 }
2694
2695 /*
2696 * pmc_reservation_free releases a reservation and all associated resources.
2697 *
2698 * NOTE: This method is NOT interrupt safe.
2699 */
2700 kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
2701 pmc_state_t newState;
2702
2703 if(!reservation) {
2704 return KERN_INVALID_ARGUMENT;
2705 }
2706
2707 perf_monitor_reservation_remove(reservation->pmc->monitor);
2708
2709 /* Move the state machine */
2710 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
2711 return KERN_FAILURE;
2712 }
2713
2714 /* If we didn't move directly to DEALLOC, help things along */
2715 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {
2716 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2717 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2718 * on every cpu that can access the PMC.
2719 */
2720 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2721 }
2722
2723 /* Block until the reservation hits the <DEALLOC, 0, > state */
2724 while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
2725 assert_wait((event_t)reservation, THREAD_UNINT);
2726 thread_block(THREAD_CONTINUE_NULL);
2727 }
2728
2729 /* remove from queues */
2730 pmc_internal_reservation_remove(reservation);
2731
2732 /* free reservation */
2733 reservation_free(reservation);
2734
2735 return KERN_SUCCESS;
2736 }
2737
2738 /*
2739 * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state.
2740 */
2741 boolean_t pmc_idle(void) {
2742 perf_monitor_t monitor = NULL;
2743 queue_head_t *cpu_queue;
2744
2745 lck_spin_lock(&perf_monitor_queue_spin);
2746
2747 if (cpu_monitor_queues) {
2748 cpu_queue = cpu_monitor_queues[cpu_number()];
2749
2750 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
2751 perf_monitor_methods_t *methods = &(monitor->methods);
2752 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {
2753 methods->on_idle(monitor->object);
2754 }
2755 }
2756 }
2757
2758 lck_spin_unlock(&perf_monitor_queue_spin);
2759
2760 return TRUE;
2761 }
2762
2763 /*
2764 * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state.
2765 */
2766 boolean_t pmc_idle_exit(void) {
2767 perf_monitor_t monitor = NULL;
2768 queue_head_t *cpu_queue;
2769
2770 lck_spin_lock(&perf_monitor_queue_spin);
2771
2772 if (cpu_monitor_queues) {
2773 cpu_queue = cpu_monitor_queues[cpu_number()];
2774
2775 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
2776 perf_monitor_methods_t *methods = &(monitor->methods);
2777 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {
2778 methods->on_idle_exit(monitor->object);
2779 }
2780 }
2781 }
2782
2783 lck_spin_unlock(&perf_monitor_queue_spin);
2784
2785 return TRUE;
2786 }
2787
2788 /*
2789 * pmc_context_switch performs all context switching necessary to save all pmc
2790 * state associated with @oldThread (and the task to which @oldThread belongs),
2791 * as well as to restore all pmc state associated with @newThread (and the task
2792 * to which @newThread belongs).
2793 *
2794 * NOTE: This method IS interrupt safe.
2795 */
2796 boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
2797 pmc_reservation_t resv = NULL;
2798 uint32_t cpuNum = cpu_number();
2799
2800 lck_spin_lock(&reservations_spin);
2801
2802 /* Save pmc states */
2803 if (thread_reservation_count) {
2804 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2805 if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
2806 (void)pmc_internal_reservation_context_out(resv);
2807 }
2808 }
2809 }
2810
2811 if (task_reservation_count) {
2812 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2813 if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
2814 (void)pmc_internal_reservation_context_out(resv);
2815 }
2816 }
2817 }
2818
2819 /* Restore */
2820 if (thread_reservation_count) {
2821 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2822 if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
2823 (void)pmc_internal_reservation_context_in(resv);
2824 }
2825 }
2826 }
2827
2828 if (task_reservation_count) {
2829 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2830 if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
2831 (void)pmc_internal_reservation_context_in(resv);
2832 }
2833 }
2834 }
2835
2836 lck_spin_unlock(&reservations_spin);
2837
2838 return TRUE;
2839 }
2840
2841 #else /* !CONFIG_COUNTERS */
2842
2843 #if 0
2844 #pragma mark -
2845 #pragma mark Dummy functions
2846 #endif
2847
2848 /*
2849 * In the case that someone has chosen not to include the PMC KPI in some
2850 * configuration, we still have exports for kexts, so we'll need to define stub
2851 * methods that return failures.
2852 */
2853 kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
2854 perf_monitor_methods_t *methods __unused) {
2855 return KERN_FAILURE;
2856 }
2857
2858 kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
2859 return KERN_FAILURE;
2860 }
2861
2862 kern_return_t pmc_register(perf_monitor_object_t monitor __unused,
2863 pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
2864 return KERN_FAILURE;
2865 }
2866
2867 kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
2868 pmc_object_t pmc __unused) {
2869 return KERN_FAILURE;
2870 }
2871
2872 kern_return_t pmc_create_config(pmc_t pmc __unused,
2873 pmc_config_t *config __unused) {
2874 return KERN_FAILURE;
2875 }
2876
2877 void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
2878 }
2879
2880 kern_return_t pmc_config_set_value(pmc_t pmc __unused,
2881 pmc_config_t config __unused, uint8_t id __unused,
2882 uint64_t value __unused) {
2883 return KERN_FAILURE;
2884 }
2885
2886 kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused,
2887 pmc_config_t config __unused, uint64_t threshold __unused,
2888 pmc_interrupt_method_t method __unused, void *refCon __unused) {
2889 return KERN_FAILURE;
2890 }
2891
2892 kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
2893 return KERN_FAILURE;
2894 }
2895
2896 void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
2897 }
2898
2899 kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused,
2900 size_t *pmcCount __unused) {
2901 return KERN_FAILURE;
2902 }
2903
2904 const char *pmc_get_name(pmc_t pmc __unused) {
2905 return "";
2906 }
2907
2908 kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused,
2909 uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
2910 return KERN_FAILURE;
2911 }
2912
2913 kern_return_t pmc_reserve(pmc_t pmc __unused,
2914 pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
2915 return KERN_FAILURE;
2916 }
2917
2918 kern_return_t pmc_reserve_task(pmc_t pmc __unused,
2919 pmc_config_t config __unused, task_t task __unused,
2920 pmc_reservation_t *reservation __unused) {
2921 return KERN_FAILURE;
2922 }
2923
2924 kern_return_t pmc_reserve_thread(pmc_t pmc __unused,
2925 pmc_config_t config __unused, thread_t thread __unused,
2926 pmc_reservation_t *reservation __unused) {
2927 return KERN_FAILURE;
2928 }
2929
2930 kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
2931 return KERN_FAILURE;
2932 }
2933
2934 kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
2935 return KERN_FAILURE;
2936 }
2937
2938 kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused,
2939 uint64_t *value __unused) {
2940 return KERN_FAILURE;
2941 }
2942
2943 kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused,
2944 uint64_t value __unused) {
2945 return KERN_FAILURE;
2946 }
2947
2948 kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
2949 return KERN_FAILURE;
2950 }
2951
2952
2953 #endif /* !CONFIG_COUNTERS */