]> git.saurik.com Git - apple/xnu.git/blame - osfmk/pmc/pmc.c
xnu-1504.9.17.tar.gz
[apple/xnu.git] / osfmk / pmc / pmc.c
CommitLineData
b0d623f7
A
1/*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <kern/kalloc.h>
25#include <kern/kern_types.h>
26#include <kern/locks.h>
27#include <kern/misc_protos.h>
28#include <kern/task.h>
29#include <kern/thread.h>
30#include <kern/zalloc.h>
31#include <machine/machine_cpu.h>
32
33#include <pmc/pmc.h>
34
35#include <libkern/OSAtomic.h>
36
37#if defined(__i386__) || defined(__x86_64__)
38#include <i386/mp.h>
39#endif
40
41#if defined(__ppc__)
42#include <ppc/cpu_internal.h>
43#include <ppc/machine_cpu.h>
44#endif
45
46#if CONFIG_COUNTERS
47
48/* various debug logging enable */
49#undef DEBUG_COUNTERS
50
51typedef uint8_t pmc_state_event_t;
52
53#define PMC_STATE_EVENT_START 0
54#define PMC_STATE_EVENT_STOP 1
55#define PMC_STATE_EVENT_FREE 2
56#define PMC_STATE_EVENT_INTERRUPT 3
57#define PMC_STATE_EVENT_END_OF_INTERRUPT 4
58#define PMC_STATE_EVENT_CONTEXT_IN 5
59#define PMC_STATE_EVENT_CONTEXT_OUT 6
60#define PMC_STATE_EVENT_LOAD_FINISHED 7
61#define PMC_STATE_EVENT_STORE_FINISHED 8
62
63/* PMC spin timeouts */
64#define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */
65#define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */
66
67uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */
68
69#ifdef DEBUG_COUNTERS
70# include <pexpert/pexpert.h>
71# define COUNTER_DEBUG(...) \
72 do { \
73 kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
74 kprintf(__VA_ARGS__); \
75 } while(0)
76
77# define PRINT_PERF_MON(x) \
78 do { \
79 kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
80 x, x->object, x->useCount, \
81 x->methods.supports_context_switching ? \
82 x->methods.supports_context_switching(x->object) : 0); \
83 } while(0)
84
85static const char const * pmc_state_state_name(pmc_state_t state) {
86 switch (PMC_STATE_STATE(state)) {
87 case PMC_STATE_STATE_INVALID:
88 return "INVALID";
89 case PMC_STATE_STATE_STOP:
90 return "STOP";
91 case PMC_STATE_STATE_CAN_RUN:
92 return "CAN_RUN";
93 case PMC_STATE_STATE_LOAD:
94 return "LOAD";
95 case PMC_STATE_STATE_RUN:
96 return "RUN";
97 case PMC_STATE_STATE_STORE:
98 return "STORE";
99 case PMC_STATE_STATE_INTERRUPT:
100 return "INTERRUPT";
101 case PMC_STATE_STATE_DEALLOC:
102 return "DEALLOC";
103 default:
104 return "UNKNOWN";
105 }
106}
107
108static const char const * pmc_state_event_name(pmc_state_event_t event) {
109 switch (event) {
110 case PMC_STATE_EVENT_START:
111 return "START";
112 case PMC_STATE_EVENT_STOP:
113 return "STOP";
114 case PMC_STATE_EVENT_FREE:
115 return "FREE";
116 case PMC_STATE_EVENT_INTERRUPT:
117 return "INTERRUPT";
118 case PMC_STATE_EVENT_END_OF_INTERRUPT:
119 return "END OF INTERRUPT";
120 case PMC_STATE_EVENT_CONTEXT_IN:
121 return "CONTEXT IN";
122 case PMC_STATE_EVENT_CONTEXT_OUT:
123 return "CONTEXT OUT";
124 case PMC_STATE_EVENT_LOAD_FINISHED:
125 return "LOAD_FINISHED";
126 case PMC_STATE_EVENT_STORE_FINISHED:
127 return "STORE_FINISHED";
128 default:
129 return "UNKNOWN";
130 }
131}
132
133# define PMC_STATE_FORMAT "<%s, %u, %s%s%s>"
134# define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
135 ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
136#else
137# define COUNTER_DEBUG(...)
138# define PRINT_PERF_MON(x)
139# define PMC_STATE_FORMAT
140# define PMC_STATE_ARGS(x)
141#endif
142
143/*!struct
144 * pmc_config is the data behind a pmc_config_t.
145 * @member object A pointer to an instance of IOPerformanceCounterConfiguration
146 * @member method A pointer to a method to call to handle PMI.
147 * @member interrupt_after_value Cause a PMI after the counter counts this many
148 * events.
149 * @member refCon Passed to the @method method as the refCon argument.
150 */
151struct pmc_config {
152 pmc_config_object_t object;
153 volatile pmc_interrupt_method_t method;
154 uint64_t interrupt_after_value;
155 void *refCon;
156};
157
158/*
159 * Allocation Zones
160 *
161 * Two allocation zones - Perf zone small and Perf zone big.
162 * Each zone has associated maximums, defined below.
163 * The small zone is the max of the smallest allocation objects (all sizes on
164 * K64):
165 * perf_monitor_t - 48 bytes
166 * perf_monitor_methods_t - 28 bytes
167 * pmc_reservation_t - 48 bytes
168 * pmc_config_t - 32 bytes
169 * perf_small_zone unit size is (on K64) 48 bytes
170 * perf_small_zone max count must be max number of perf monitors, plus (max
171 * number of reservations * 2). The "*2" is because each reservation has a
172 * pmc_config_t within.
173 *
174 * Big zone is max of the larger allocation units
175 * pmc_t - 144 bytes
176 * pmc_methods_t - 116 bytes
177 * perf_big_zone unit size is (on K64) 144 bytes
178 * perf_big_zone max count is the max number of PMCs we support.
179 */
180
181static zone_t perf_small_zone = NULL;
182#define MAX_PERF_SMALLS (256 + 8196 + 8196)
183#define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \
184 sizeof(struct pmc_reservation)), sizeof(struct pmc_config)))
185
186static zone_t perf_big_zone = NULL;
187#define MAX_PERF_BIGS (1024)
188#define PERF_BIG_UNIT_SZ (sizeof(struct pmc))
189
190/*
191 * Locks and Lock groups
192 */
193static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
194static lck_grp_attr_t *pmc_lock_grp_attr;
195static lck_attr_t *pmc_lock_attr;
196
197/* PMC tracking queue locks */
198static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */
199static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */
200
201/* Reservation tracking queues lock */
202static lck_spin_t reservations_spin;
203
204/*
205 * Tracking queues
206 *
207 * Keeps track of registered perf monitors and perf counters
208 */
209static queue_t perf_monitors_queue = NULL;
210static volatile uint32_t perf_monitors_count = 0U;
211
212static queue_t perf_counters_queue = NULL;
213static volatile uint32_t perf_counters_count = 0U;
214
215/*
216 * Reservation queues
217 *
218 * Keeps track of all system, task, and thread-level reservations (both active and
219 * inactive).
220 *
221 * We track them all here (rather than in their respective task or thread only)
222 * so that we can inspect our tracking data directly (rather than peeking at
223 * every task and thread) to determine if/when a new reservation would
224 * constitute a conflict.
225 */
226static queue_t system_reservations = NULL;
227static volatile uint32_t system_reservation_count __attribute__((aligned(4))) = 0U;
228
229static queue_t task_reservations = NULL;
230static volatile uint32_t task_reservation_count __attribute__((aligned(4))) = 0U;
231
232static queue_t thread_reservations = NULL;
233static volatile uint32_t thread_reservation_count __attribute__((aligned(4))) = 0U;
234
235
236#if XNU_KERNEL_PRIVATE
237
238/*
239 * init_pmc_locks creates and initializes all the locks and lock groups and lock
240 * attributes required for the pmc sub-system.
241 */
242static void init_pmc_locks(void) {
243 pmc_lock_attr = lck_attr_alloc_init();
244 assert(pmc_lock_attr);
245
246 pmc_lock_grp_attr = lck_grp_attr_alloc_init();
247 assert(pmc_lock_grp_attr);
248
249 pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
250 assert(pmc_lock_grp);
251
252 lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
253 lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
254
255 lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
256}
257
258/*
259 * init_pmc_zones initializes the allocation zones used by the pmc subsystem
260 */
261static void init_pmc_zones(void) {
262 perf_small_zone = zinit(PERF_SMALL_UNIT_SZ,
263 MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS,
264 "pmc.small zone");
265
266 assert(perf_small_zone);
267
268 perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
269 MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS,
270 "pmc.big zone");
271
272 assert(perf_big_zone);
273}
274
275/*
276 * init_pmc_queues allocates and initializes the tracking queues for
277 * registering and reserving individual pmcs and perf monitors.
278 */
279static void init_pmc_queues(void) {
280 perf_monitors_queue = (queue_t)kalloc(sizeof(queue_t));
281 assert(perf_monitors_queue);
282
283 queue_init(perf_monitors_queue);
284
285 perf_counters_queue = (queue_t)kalloc(sizeof(queue_t));
286 assert(perf_counters_queue);
287
288 queue_init(perf_counters_queue);
289
290 system_reservations = (queue_t)kalloc(sizeof(queue_t));
291 assert(system_reservations);
292
293 queue_init(system_reservations);
294
295 task_reservations = (queue_t)kalloc(sizeof(queue_t));
296 assert(task_reservations);
297
298 queue_init(task_reservations);
299
300 thread_reservations = (queue_t)kalloc(sizeof(queue_t));
301 assert(thread_reservations);
302
303 queue_init(thread_reservations);
304}
305
306/*
307 * pmc_bootstrap brings up all the necessary infrastructure required to use the
308 * pmc sub-system.
309 */
310__private_extern__
311void pmc_bootstrap(void) {
312 /* build our alloc zones */
313 init_pmc_zones();
314
315 /* build the locks */
316 init_pmc_locks();
317
318 /* build our tracking queues */
319 init_pmc_queues();
320}
321
322#endif /* XNU_KERNEL_PRIVATE */
323
324/*
325 * Perf Monitor Internals
326 */
327
328static perf_monitor_t perf_monitor_alloc(void) {
329 /* perf monitors come from the perf small zone */
330 return (perf_monitor_t)zalloc(perf_small_zone);
331}
332
333static void perf_monitor_free(void *pm) {
334 zfree(perf_small_zone, pm);
335}
336
337static void perf_monitor_init(perf_monitor_t pm) {
338 assert(pm);
339
340 pm->object = NULL;
341
342 bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
343
344 pm->useCount = 1; /* initial retain count of 1, for caller */
345
346 pm->link.next = pm->link.prev = (queue_entry_t)NULL;
347}
348
349/*
350 * perf_monitor_dequeue removes the given perf_monitor_t from the
351 * perf_monitor_queue, thereby unregistering it with the system.
352 */
353static void perf_monitor_dequeue(perf_monitor_t pm) {
354 lck_spin_lock(&perf_monitor_queue_spin);
355
356 /*
357 * remove the @pm object from the @perf_monitor_queue queue (it is of type
358 * <perf_monitor_t> and has a field called @link that is the queue_link_t
359 */
360 queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);
361
362 perf_monitors_count--;
363
364 lck_spin_unlock(&perf_monitor_queue_spin);
365}
366
367/*
368 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
369 * thereby registering it for use with the system.
370 */
371static void perf_monitor_enqueue(perf_monitor_t pm) {
372 lck_spin_lock(&perf_monitor_queue_spin);
373
374 queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
375
376 perf_monitors_count++;
377
378 lck_spin_unlock(&perf_monitor_queue_spin);
379}
380
381/*
382 * perf_monitor_reference increments the reference count for the given
383 * perf_monitor_t.
384 */
385static void perf_monitor_reference(perf_monitor_t pm) {
386 assert(pm);
387
388 OSIncrementAtomic(&(pm->useCount));
389}
390
391/*
392 * perf_monitor_deallocate decrements the reference count for the given
393 * perf_monitor_t. If the reference count hits 0, the object is released back
394 * to the perf_small_zone via a call to perf_monitor_free().
395 */
396static void perf_monitor_deallocate(perf_monitor_t pm) {
397 assert(pm);
398
399 /* If we just removed the last reference count */
400 if(1 == OSDecrementAtomic(&(pm->useCount))) {
401 /* Free the object */
402 perf_monitor_free(pm);
403 }
404}
405
406/*
407 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
408 * given C++ object pointer that was used when registering with the subsystem.
409 *
410 * If found, the method returns the perf_monitor_t with an extra reference
411 * placed on the object (or NULL if not
412 * found).
413 *
414 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
415 * calling perf_monitor_find.
416 */
417static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
418 assert(monitor);
419 perf_monitor_t element = NULL;
420 perf_monitor_t found = NULL;
421
422 lck_spin_lock(&perf_monitor_queue_spin);
423
424 queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
425 if(element && element->object == monitor) {
426 /* We found it - reference the object. */
427 perf_monitor_reference(element);
428 found = element;
429 break;
430 }
431 }
432
433 lck_spin_unlock(&perf_monitor_queue_spin);
434
435 return found;
436}
437
438/*
439 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
440 * aassociated with.
441 */
442static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
443 assert(pm);
444 assert(pmc);
445
446 /* Today, we merely add a reference count now that a new pmc is attached */
447 perf_monitor_reference(pm);
448}
449
450/*
451 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
452 * monitor it is associated with.
453 */
454static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
455 assert(pm);
456 assert(pmc);
457
458 /* Today, we merely remove a reference count now that the pmc is detached */
459 perf_monitor_deallocate(pm);
460}
461
462/*
463 * Perf Counter internals
464 */
465
466static pmc_t pmc_alloc(void) {
467 return (pmc_t)zalloc(perf_big_zone);
468}
469
470static void pmc_free(void *pmc) {
471 zfree(perf_big_zone, pmc);
472}
473
474/*
475 * pmc_init initializes a newly allocated pmc_t
476 */
477static void pmc_init(pmc_t pmc) {
478 assert(pmc);
479
480 pmc->object = NULL;
481 pmc->monitor = NULL;
482
483 bzero(&pmc->methods, sizeof(pmc_methods_t));
484
485 /* One reference for the caller */
486 pmc->useCount = 1;
487}
488
489/*
490 * pmc_reference increments the reference count of the given pmc_t
491 */
492static void pmc_reference(pmc_t pmc) {
493 assert(pmc);
494
495 OSIncrementAtomic(&(pmc->useCount));
496}
497
498/*
499 * pmc_deallocate decrements the reference count of the given pmc_t. If the
500 * reference count hits zero, the given pmc_t is deallocated and released back
501 * to the allocation zone.
502 */
503static void pmc_deallocate(pmc_t pmc) {
504 assert(pmc);
505
506 /* If we just removed the last reference count */
507 if(1 == OSDecrementAtomic(&(pmc->useCount))) {
508 /* Free the pmc */
509 pmc_free(pmc);
510 }
511}
512
513/*
514 * pmc_dequeue removes the given, newly *un*registered pmc from the
515 * perf_counters_queue.
516 */
517static void pmc_dequeue(pmc_t pmc) {
518 lck_spin_lock(&perf_counters_queue_spin);
519
520 queue_remove(perf_counters_queue, pmc, pmc_t, link);
521
522 perf_counters_count--;
523
524 lck_spin_unlock(&perf_counters_queue_spin);
525}
526
527/*
528 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
529 */
530static void pmc_enqueue(pmc_t pmc) {
531 lck_spin_lock(&perf_counters_queue_spin);
532
533 queue_enter(perf_counters_queue, pmc, pmc_t, link);
534
535 perf_counters_count++;
536
537 lck_spin_unlock(&perf_counters_queue_spin);
538}
539
540/*
541 * pmc_find attempts to locate a pmc_t that was registered with the given
542 * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference
543 * which must be dropped by the caller by calling pmc_deallocate().
544 */
545static pmc_t pmc_find(pmc_object_t object) {
546 assert(object);
547
548 lck_spin_lock(&perf_counters_queue_spin);
549
550 pmc_t element = NULL;
551 pmc_t found = NULL;
552
553 queue_iterate(perf_counters_queue, element, pmc_t, link) {
554 if(element && element->object == object) {
555 pmc_reference(element);
556
557 found = element;
558 break;
559 }
560 }
561
562 lck_spin_unlock(&perf_counters_queue_spin);
563
564 return found;
565}
566
567/*
568 * Config internals
569 */
570
571/* Allocate a pmc_config_t */
572static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
573 return (pmc_config_t)zalloc(perf_small_zone);
574}
575
576/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
577static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
578 assert(pmc);
579 assert(config);
580
581 if(config->object) {
582 pmc->methods.free_config(pmc->object, config->object);
583 config->object = NULL;
584 }
585
586 zfree(perf_small_zone, config);
587}
588
589static kern_return_t pmc_open(pmc_t pmc) {
590 assert(pmc);
591 assert(pmc->object);
592 assert(pmc->open_object);
593
594 return pmc->methods.open(pmc->object, pmc->open_object);
595}
596
597static kern_return_t pmc_close(pmc_t pmc) {
598 assert(pmc);
599 assert(pmc->object);
600 assert(pmc->open_object);
601
602 return pmc->methods.close(pmc->object, pmc->open_object);
603}
604
605/*
606 * Reservation Internals
607 */
608
609static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
610static void pmc_internal_reservation_store(pmc_reservation_t reservation);
611static void pmc_internal_reservation_load(pmc_reservation_t reservation);
612
613static pmc_reservation_t reservation_alloc(void) {
614 /* pmc reservations come from the perf small zone */
615 return (pmc_reservation_t)zalloc(perf_small_zone);
616}
617
618/*
619 * reservation_free deallocates and releases all resources associated with the
620 * given pmc_reservation_t. This includes freeing the config used to create the
621 * reservation, decrementing the reference count for the pmc used to create the
622 * reservation, and deallocating the reservation's memory.
623 */
624static void reservation_free(pmc_reservation_t resv) {
625 /* Free config */
626 if(resv->config) {
627 assert(resv->pmc);
628
629 pmc_free_config(resv->pmc, resv->config);
630
631 resv->config = NULL;
632 }
633
634 /* release PMC */
635 (void)pmc_internal_reservation_set_pmc(resv, NULL);
636
637 /* Free reservation */
638 zfree(perf_small_zone, resv);
639}
640
641/*
642 * reservation_init initializes a newly created reservation.
643 */
644static void reservation_init(pmc_reservation_t resv) {
645 assert(resv);
646
647 resv->pmc = NULL;
648 resv->config = NULL;
649 resv->value = 0ULL;
650
651 resv->flags = 0U;
652 resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
653 resv->active_last_context_in = 0U;
654
655 /*
656 * Since this member is a union, we only need to set either the task
657 * or thread to NULL.
658 */
659 resv->task = TASK_NULL;
660}
661
662/*
663 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
664 * there was one set already, it is deallocated (reference is dropped) before
665 * the new one is set. This methods increases the reference count of the given
666 * pmc_t.
667 *
668 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
669 * dropping the reference on any previously set pmc, and setting the reservation
670 * to having no pmc set.
671 */
672static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
673 assert(resv);
674
675 if(resv->pmc) {
676 (void)pmc_close(resv->pmc);
677 pmc_deallocate(resv->pmc);
678 resv->pmc = NULL;
679 }
680
681 resv->pmc = pmc;
682
683 if(resv->pmc) {
684 pmc_reference(resv->pmc);
685 if(KERN_SUCCESS != pmc_open(resv->pmc)) {
686 pmc_deallocate(resv->pmc);
687 resv->pmc = NULL;
688
689 return KERN_FAILURE;
690 }
691 }
692
693 return KERN_SUCCESS;
694}
695
696/*
697 * Used to place reservation into one of the system, task, and thread queues
698 * Assumes the queue's spin lock is already held.
699 */
700static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
701 assert(queue);
702 assert(resv);
703
704 queue_enter(queue, resv, pmc_reservation_t, link);
705}
706
707static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
708 assert(queue);
709 assert(resv);
710
711 queue_remove(queue, resv, pmc_reservation_t, link);
712}
713
714/* Returns TRUE if the reservation applies to the current execution context */
715static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
716 boolean_t ret = FALSE;
717 assert(resv);
718
719 if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
720 ret = TRUE;
721 } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
722 if(current_task() == resv->task) {
723 ret = TRUE;
724 }
725 } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
726 if(current_thread() == resv->thread) {
727 ret = TRUE;
728 }
729 }
730
731 return ret;
732}
733
734/*
735 * pmc_accessible_core_count returns the number of logical cores that can access
736 * a given @pmc. 0 means every core in the system.
737 */
738static uint32_t pmc_accessible_core_count(pmc_t pmc) {
739 assert(pmc);
740
741 uint32_t *cores = NULL;
742 size_t coreCt = 0UL;
743
744 if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
745 &cores, &coreCt)) {
746 coreCt = 0U;
747 }
748
749 return (uint32_t)coreCt;
750}
751
752/* spin lock for the queue must already be held */
753/*
754 * This method will inspect the task/thread of the reservation to see if it
755 * matches the new incoming one (for thread/task reservations only). Will only
756 * return TRUE if the task/thread matches.
757 */
758static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t
759resv) {
760 assert(queue);
761 assert(resv);
762
763 boolean_t ret = FALSE;
764 pmc_reservation_t tmp = NULL;
765
766 queue_iterate(queue, tmp, pmc_reservation_t, link) {
767 if(tmp) {
768 if(tmp->pmc == resv->pmc) {
769 /* PMC matches - make sure scope matches first */
770 switch(PMC_FLAG_SCOPE(tmp->flags)) {
771 case PMC_FLAG_SCOPE_SYSTEM:
772 /*
773 * Found a reservation in system queue with same pmc - always a
774 * conflict.
775 */
776 ret = TRUE;
777 break;
778 case PMC_FLAG_SCOPE_THREAD:
779 /*
780 * Found one in thread queue with the same PMC as the
781 * argument. Only a conflict if argument scope isn't
782 * thread or system, or the threads match.
783 */
784 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) ||
785 (tmp->thread == resv->thread);
786
787 if(!ret) {
788 /*
789 * so far, no conflict - check that the pmc that is
790 * being reserved isn't accessible from more than
791 * one core, if it is, we need to say it's already
792 * taken.
793 */
794 if(1 != pmc_accessible_core_count(tmp->pmc)) {
795 ret = TRUE;
796 }
797 }
798 break;
799 case PMC_FLAG_SCOPE_TASK:
800 /*
801 * Follow similar semantics for task scope.
802 */
803
804 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
805 (tmp->task == resv->task);
806 if(!ret) {
807 /*
808 * so far, no conflict - check that the pmc that is
809 * being reserved isn't accessible from more than
810 * one core, if it is, we need to say it's already
811 * taken.
812 */
813 if(1 != pmc_accessible_core_count(tmp->pmc)) {
814 ret = TRUE;
815 }
816 }
817
818 break;
819 }
820
821 if(ret) break;
822 }
823 }
824 }
825
826 return ret;
827}
828
829/*
830 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be
831 * added to its target queue without createing conflicts (target queue is
832 * determined by the reservation's scope flags). Further, this method returns
833 * FALSE if any level contains a reservation for a PMC that can be accessed from
834 * more than just 1 core, and the given reservation also wants the same PMC.
835 */
836static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
837 assert(resv);
838 boolean_t ret = TRUE;
839
840 if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
841 pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
842 pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
843 ret = FALSE;
844 }
845
846 return ret;
847}
848
849static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
850 assert(thread);
851
852 /* See if this thread needs it's PMC flag set */
853 pmc_reservation_t tmp = NULL;
854
855 if(!newFlag) {
856 /*
857 * If the parent task just dropped its reservation, iterate the thread
858 * reservations to see if we need to keep the pmc flag set for the given
859 * thread or not.
860 */
861 lck_spin_lock(&reservations_spin);
862
863 queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
864 if(tmp->thread == thread) {
865 newFlag = TRUE;
866 break;
867 }
868 }
869
870 lck_spin_unlock(&reservations_spin);
871 }
872
873 if(newFlag) {
874 OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
875 } else {
876 OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
877 }
878}
879
880/*
881 * This operation is (worst case) O(N*M) where N is number of threads in the
882 * given task, and M is the number of thread reservations in our system.
883 */
884static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
885 assert(task);
886 thread_t thread = NULL;
887
888 if(newFlag) {
889 OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
890 } else {
891 OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
892 }
893
894 task_lock(task);
895
896 queue_iterate(&task->threads, thread, thread_t, task_threads) {
897 /* propagate the task's mask down to each thread */
898 pmc_internal_update_thread_flag(thread, newFlag);
899 }
900
901 task_unlock(task);
902}
903
904/*
905 * pmc_internal_reservation_add adds a reservation to the global tracking queues after
906 * ensuring there are no reservation conflicts. To do this, it takes all the
907 * spin locks for all the queue (to ensure no other core goes and adds a
908 * reservation for the same pmc to a queue that has already been checked).
909 */
910static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
911 assert(resv);
912
913 boolean_t ret = FALSE;
914
915 /* always lock all three in the same order */
916 lck_spin_lock(&reservations_spin);
917
918 /* Check if the reservation can be added without conflicts */
919 if(pmc_internal_reservation_validate_for_pmc(resv)) {
920 ret = TRUE;
921 }
922
923 if(ret) {
924 /* add reservation to appropriate scope */
925 switch(PMC_FLAG_SCOPE(resv->flags)) {
926
927 /* System-wide counter */
928 case PMC_FLAG_SCOPE_SYSTEM:
929 /* Simply add it to the system queue */
930 pmc_internal_reservation_enqueue(system_reservations, resv);
931
932 lck_spin_unlock(&reservations_spin);
933
934 break;
935
936 /* Task-switched counter */
937 case PMC_FLAG_SCOPE_TASK:
938 assert(resv->task);
939
940 /* Not only do we enqueue it in our local queue for tracking */
941 pmc_internal_reservation_enqueue(task_reservations, resv);
942
943 lck_spin_unlock(&reservations_spin);
944
945 /* update the task mask, and propagate it to existing threads */
946 pmc_internal_update_task_flag(resv->task, TRUE);
947 break;
948
949 /* Thread-switched counter */
950 case PMC_FLAG_SCOPE_THREAD:
951 assert(resv->thread);
952
953 /*
954 * Works the same as a task-switched counter, only at
955 * thread-scope
956 */
957
958 pmc_internal_reservation_enqueue(thread_reservations, resv);
959
960 lck_spin_unlock(&reservations_spin);
961
962 pmc_internal_update_thread_flag(resv->thread, TRUE);
963 break;
964 }
965 } else {
966 lck_spin_unlock(&reservations_spin);
967 }
968
969 return ret;
970}
971
972static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
973 uint32_t * cores;
974 size_t core_cnt;
975
976 /* Get the list of accessible cores */
977 if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
978 boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);
979
980 /* Fast case: the PMC is only accessible from one core and we happen to be on it */
981 if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
982 action_func(reservation);
983 } else {
984 /* Call action_func on every accessible core */
985#if defined(__i386__) || defined(__x86_64__)
986 size_t ii;
987 cpumask_t mask = 0;
988
989 /* Build a mask for the accessible cores */
990 if (core_cnt > 0) {
991 for (ii = 0; ii < core_cnt; ii++) {
992 mask |= cpu_to_cpumask(cores[ii]);
993 }
994 } else {
995 /* core_cnt = 0 really means all cpus */
996 mask = CPUMASK_ALL;
997 }
998
999 /* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */
1000 mp_cpus_call(mask, ASYNC, action_func, reservation);
1001#elif defined(__ppc__)
1002 size_t ii;
1003
1004 if (core_cnt > 0) {
1005 for (ii = 0; ii < core_cnt; ii++) {
1006 if (cores[ii] == (uint32_t)cpu_number()) {
1007 action_func(reservation);
1008 } else {
1009 cpu_signal(cores[ii], SIGPcall, (uint32_t)action_func, (uint32_t)reservation);
1010 }
1011 }
1012 } else {
1013 uint32_t sync;
1014 cpu_broadcast(&sync, (void (*)(uint32_t))action_func, (uint32_t)reservation);
1015 action_func(reservation);
1016 }
1017#else
1018#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
1019#endif
1020 }
1021
1022 ml_set_interrupts_enabled(intrs_enabled);
1023 }
1024
1025}
1026
1027/*
1028 * pmc_internal_reservation_remove removes the given reservation from the appropriate
1029 * reservation queue according to its scope.
1030 *
1031 * NOTE: The scope flag must have been set for this method to function.
1032 */
1033static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
1034 assert(resv);
1035
1036 /*
1037 * Due to the way the macros are written, we can't just blindly queue-remove
1038 * the reservation without knowing which queue it's in. We figure this out
1039 * using the reservation's scope flags.
1040 */
1041
1042 switch(PMC_FLAG_SCOPE(resv->flags)) {
1043
1044 case PMC_FLAG_SCOPE_SYSTEM:
1045 lck_spin_lock(&reservations_spin);
1046 pmc_internal_reservation_dequeue(system_reservations, resv);
1047 lck_spin_unlock(&reservations_spin);
1048 break;
1049
1050 case PMC_FLAG_SCOPE_TASK:
1051
1052 /* Lock the global spin lock */
1053 lck_spin_lock(&reservations_spin);
1054
1055 /* remove from the global queue */
1056 pmc_internal_reservation_dequeue(task_reservations, resv);
1057
1058 /* unlock the global */
1059 lck_spin_unlock(&reservations_spin);
1060
1061 /* Recalculate task's counter mask */
1062 pmc_internal_update_task_flag(resv->task, FALSE);
1063 break;
1064
1065 case PMC_FLAG_SCOPE_THREAD:
1066 lck_spin_lock(&reservations_spin);
1067
1068 pmc_internal_reservation_dequeue(thread_reservations, resv);
1069
1070 lck_spin_unlock(&reservations_spin);
1071
1072 /* recalculate the thread's counter mask */
1073 pmc_internal_update_thread_flag(resv->thread, FALSE);
1074
1075 break;
1076 }
1077}
1078
1079/* Reservation State Machine
1080 *
1081 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a
1082 * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised
1083 * of a state, a count of active contexts, and a set of modifier flags. A state machine defines
1084 * the possible transitions at each event point given the current 3-tuple. Atomicity is handled
1085 * by reading the current 3-tuple, applying the transformations indicated by the state machine
1086 * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails,
1087 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
1088 * available.
1089 *
1090 * The state machine is described using tuple notation for the current state and a related notation
1091 * for describing the transformations. For concisness, the flag and state names are abbreviated as
1092 * follows:
1093 *
1094 * states:
1095 * S = STOP
1096 * CR = CAN_RUN
1097 * L = LOAD
1098 * R = RUN
1099 * ST = STORE
1100 * I = INTERRUPT
1101 * D = DEALLOC
1102 *
1103 * flags:
1104 *
1105 * S = STOPPING
1106 * D = DEALLOCING
1107 * I = INTERRUPTING
1108 *
1109 * The tuple notation is formed from the following pattern:
1110 *
1111 * tuple = < state, active-context-count, flags >
1112 * state = S | CR | L | R | ST | I | D
1113 * active-context-count = 0 | >0 | 1 | >1
1114 * flags = flags flag | blank
1115 * flag = S | D | I
1116 *
1117 * The transform notation is similar, but only describes the modifications made to the current state.
1118 * The notation is formed from the following pattern:
1119 *
1120 * transform = < state, active-context-count, flags >
1121 * state = S | CR | L | R | ST | I | D
1122 * active-context-count = + | - | blank
1123 * flags = flags flag | flags !flag | blank
1124 * flag = S | D | I
1125 *
1126 * And now for the state machine:
1127 * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished
1128 * <CR, 0, > <S, , > <D, , > <L, +, >
1129 * <D, 0, >
1130 * <D, 1, D> < , -, !D>
1131 * <D, >1, D> < , -, >
1132 * <I, 0, D> <D, , !D>
1133 * <I, 0, S> < , , !S> < , , !SD> <S, , !S>
1134 * <I, 0, > < , , S> < , , D> <CR, , >
1135 * <L, 1, D> <ST, -, >
1136 * <L, 1, ID> <ST, -, >
1137 * <L, 1, IS> < , , !SD> <ST, -, >
1138 * <L, 1, S> < , , !S> < , , !SD> <ST, -, >
1139 * <L, 1, > < , , S> < , , D> < , , IS> < , +, > <R, , >
1140 * <L, >1, D> < , -, > <R, -, >
1141 * <L, >1, ID> < , -, > <R, -, >
1142 * <L, >1, IS> < , , !SD> < , -, > <R, -, >
1143 * <L, >1, S> < , , !S> < , , !SD> < , -, > <R, -, >
1144 * <L, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > <R, , >
1145 * <R, 1, D> <ST, -, >
1146 * <R, 1, ID> <ST, -, >
1147 * <R, 1, IS> < , , !SD> <ST, -, >
1148 * <R, 1, S> < , , !S> < , , !SD> <ST, -, >
1149 * <R, 1, > < , , S> < , , D> < , , IS> < , +, > <ST, -, >
1150 * <R, >1, D> < , -, >
1151 * <R, >1, ID> < , -, >
1152 * <R, >1, IS> < , , !SD> < , -, >
1153 * <R, >1, S> < , , !S> < , , !SD> < , -, >
1154 * <R, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, >
1155 * <S, 0, > <CR, , > <D, , >
1156 * <S, 1, ID> <I, -, !I>
1157 * <S, 1, IS> < , , !SD> <I, -, !I>
1158 * <S, 1, S> < , , !S> <D, , !SD> < , -, !S>
1159 * <S, 1, > < , , S> <D, , D> <L, +, > <CR, -, >
1160 * <S, >1, ID> < , -, >
1161 * <S, >1, IS> < , , !SD> < , -, >
1162 * <S, >1, S> < , , !S> <D, , !SD> < , -, >
1163 * <S, >1, > < , , S> <D, , D> <L, +, > < , -, >
1164 * <ST, 0, D> <D, , !D>
1165 * <ST, 0, ID> <I, , !I>
1166 * <ST, 0, IS> < , , !SD> <I, , !I>
1167 * <ST, 0, S> < , , !S> < , , !SD> <S, , !S>
1168 * <ST, 0, > < , , S> < , , D> < , , IS> < , +, > <CR, , >
1169 * <ST, >0, D> < , -, > <D, , >
1170 * <ST, >0, ID> < , -, > <S, , >
1171 * <ST, >0, IS> < , , !SD> < , -, > <S, , >
1172 * <ST, >0, S> < , , !S> < , , !SD> < , -, > <S, , >
1173 * <ST, >0, > < , , S> < , , D> < , , IS> < , +, > < , -, > <L, , >
1174 */
1175
1176static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
1177 uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
1178
1179 switch (event) {
1180 case PMC_STATE_EVENT_START:
1181 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1182 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1183 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1184 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1185 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1186 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1187 new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
1188 break;
1189 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1190 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1191 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1192 }
1193 break;
1194 }
1195 break;
1196 case PMC_STATE_EVENT_STOP:
1197 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1198 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1199 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1200 break;
1201 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1202 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1203 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1204 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1205 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1206 break;
1207 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1208 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1209 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1210 }
1211 break;
1212 }
1213 break;
1214 case PMC_STATE_EVENT_FREE:
1215 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1216 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1217 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1218 break;
1219 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1220 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1221 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1222 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1223 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1224 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1225 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1226 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1227 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1228 break;
1229 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1230 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1231 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1232 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1233 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1234 break;
1235 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1236 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1237 break;
1238 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1239 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1240 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1241 } else {
1242 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1243 }
1244 break;
1245 }
1246 break;
1247 case PMC_STATE_EVENT_INTERRUPT:
1248 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1249 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1250 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1251 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1252 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
1253 break;
1254 }
1255 break;
1256 case PMC_STATE_EVENT_END_OF_INTERRUPT:
1257 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1258 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
1259 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1260 break;
1261 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1262 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1263 break;
1264 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1265 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1266 break;
1267 }
1268 break;
1269 case PMC_STATE_EVENT_CONTEXT_IN:
1270 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1271 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1272 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1273 break;
1274 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1275 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1276 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1277 new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
1278 break;
1279 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1280 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1281 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1282 }
1283 break;
1284 }
1285 break;
1286 case PMC_STATE_EVENT_CONTEXT_OUT:
1287 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1288 case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
1289 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1290 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
1291 } else {
1292 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1293 }
1294 break;
1295 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1296 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1297 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1298 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1299 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1300 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1301 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1302 }
1303 break;
1304 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
1305 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1306 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1307 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1308 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1309 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1310 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1311 } else {
1312 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1313 }
1314 break;
1315 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1316 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1317 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1318 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
1319 } else {
1320 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1321 }
1322 break;
1323 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1324 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1325 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
1326 } else {
1327 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1328 }
1329 break;
1330 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1331 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1332 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1333 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
1334 } else {
1335 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1336 }
1337 }
1338 break;
1339 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1340 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1341 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1342 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1343 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1344 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1345 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1346 }
1347 break;
1348 }
1349 break;
1350 case PMC_STATE_EVENT_LOAD_FINISHED:
1351 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1352 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1353 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1354 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1355 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1356 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1357 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
1358 } else {
1359 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1360 }
1361 break;
1362 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1363 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
1364 break;
1365 }
1366 break;
1367 case PMC_STATE_EVENT_STORE_FINISHED:
1368 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1369 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1370 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1371 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1372 } else {
1373 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1374 }
1375 break;
1376 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1377 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1378 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1379 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
1380 } else {
1381 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1382 }
1383 break;
1384 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1385 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1386 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1387 } else {
1388 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1389 }
1390 break;
1391 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1392 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1393 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1394 } else {
1395 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
1396 }
1397 break;
1398 }
1399 break;
1400 }
1401
1402 return new_state;
1403}
1404
1405static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
1406 pmc_state_t oldState;
1407 pmc_state_t newState;
1408
1409 assert(reservation);
1410
1411 /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition
1412 * or the there is no valid move.
1413 */
1414 do {
1415 oldState = reservation->state;
1416 newState = pmc_internal_reservation_next_state(oldState, event);
1417 } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
1418
1419 if (newState != PMC_STATE_INVALID) {
1420 COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
1421 } else {
1422 COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
1423 }
1424
1425 if (old_state_out != NULL) {
1426 *old_state_out = oldState;
1427 }
1428
1429 return newState;
1430}
1431
1432static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
1433 assert(reservation);
1434 pmc_state_t newState;
1435 pmc_state_t oldState;
1436
1437 /* Clear that the this reservation was active when this cpu did its last context in */
1438 OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
1439
1440 /* Move the state machine */
1441 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
1442 return;
1443 }
1444
1445 /* Do any actions required based on the state change */
1446 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
1447 /* Just moved into STORE, so store the reservation. */
1448 pmc_internal_reservation_store(reservation);
1449 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1450 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1451 thread_wakeup((event_t)reservation);
1452 }
1453
1454}
1455
1456static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
1457 assert(reservation);
1458 pmc_state_t oldState;
1459 pmc_state_t newState;
1460
1461 /* Move the state machine */
1462 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
1463 return;
1464 }
1465
1466 /* Mark that the reservation was active when this cpu did its last context in */
1467 OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1468
1469 /* Do any actions required based on the state change */
1470 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
1471 /* Just moved into LOAD, so load the reservation. */
1472 pmc_internal_reservation_load(reservation);
1473 }
1474
1475}
1476
1477static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
1478 assert(reservation);
1479 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
1480
1481 assert(reservation->pmc);
1482 assert(reservation->config);
1483
1484 pmc_state_t newState;
1485 kern_return_t ret = KERN_SUCCESS;
1486
1487 pmc_t store_pmc = reservation->pmc;
1488 pmc_object_t store_pmc_obj = store_pmc->object;
1489 perf_monitor_t store_pm = store_pmc->monitor;
1490
1491 /*
1492 * Instruct the Perf Monitor that contains this counter to turn
1493 * off the global disable for this counter.
1494 */
1495 ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
1496 if(KERN_SUCCESS != ret) {
1497 COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
1498 return;
1499 }
1500
1501 /* Instruct the counter to disable itself */
1502 ret = store_pmc->methods.disable(store_pmc_obj);
1503 if(KERN_SUCCESS != ret) {
1504 COUNTER_DEBUG(" [error] disable: 0x%x\n", ret);
1505 }
1506
1507 /*
1508 * At this point, we're off the hardware, so we don't have to
1509 * set_on_hardare(TRUE) if anything fails from here on.
1510 */
1511
1512 /* store the counter value into the reservation's stored count */
1513 ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
1514 if(KERN_SUCCESS != ret) {
1515 COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret);
1516 return;
1517 }
1518
1519 /* Advance the state machine now that the STORE is finished */
1520 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
1521 return;
1522 }
1523
1524 /* Do any actions required based on the state change */
1525 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
1526 /* Just moved into LOAD, so load the reservation. */
1527 pmc_internal_reservation_load(reservation);
1528 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1529 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1530 thread_wakeup((event_t)reservation);
1531 }
1532
1533}
1534
1535static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
1536 assert(reservation);
1537 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);
1538
1539 pmc_state_t newState;
1540 kern_return_t ret = KERN_SUCCESS;
1541
1542 assert(reservation->pmc);
1543 assert(reservation->config);
1544
1545 pmc_t load_pmc = reservation->pmc;
1546 pmc_object_t load_pmc_obj = load_pmc->object;
1547 perf_monitor_t load_pm = load_pmc->monitor;
1548
1549 /* Set the control register up with the stored configuration */
1550 ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
1551 if(KERN_SUCCESS != ret) {
1552 COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret);
1553 return;
1554 }
1555
1556 /* load the counter value */
1557 ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
1558 if(KERN_SUCCESS != ret) {
1559 COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret);
1560 return;
1561 }
1562
1563 /* Locally enable the counter */
1564 ret = load_pmc->methods.enable(load_pmc_obj);
1565 if(KERN_SUCCESS != ret) {
1566 COUNTER_DEBUG(" [error] enable: 0x%x\n", ret);
1567 return;
1568 }
1569
1570 /*
1571 * Instruct the Perf Monitor containing the pmc to enable the
1572 * counter.
1573 */
1574 ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
1575 if(KERN_SUCCESS != ret) {
1576 COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret);
1577 /* not on the hardware. */
1578 return;
1579 }
1580
1581 /* Advance the state machine now that the STORE is finished */
1582 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
1583 return;
1584 }
1585
1586 /* Do any actions required based on the state change */
1587 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
1588 /* Just moved into STORE, so store the reservation. */
1589 pmc_internal_reservation_store(reservation);
1590 }
1591
1592}
1593
1594static void pmc_internal_reservation_start_cpu(void * arg) {
1595 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1596
1597 assert(reservation);
1598
1599 if (pmc_internal_reservation_matches_context(reservation)) {
1600 /* We are in context, but the reservation may have already had the context_in method run. Attempt
1601 * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in.
1602 */
1603 uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1604
1605 if ((oldMask & (1U << cpu_number())) == 0) {
1606 COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
1607
1608 pmc_internal_reservation_context_in(reservation);
1609 }
1610 }
1611}
1612
1613static void pmc_internal_reservation_stop_cpu(void * arg) {
1614 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1615
1616 assert(reservation);
1617
1618 if (pmc_internal_reservation_matches_context(reservation)) {
1619 COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
1620
1621 pmc_internal_reservation_context_out(reservation);
1622 }
1623}
1624
1625/*!fn
1626 * pmc_reservation_interrupt is called when a PMC reservation which was setup
1627 * with an interrupt threshold counts the requested number of events. When the
1628 * underlying counter hits the threshold, an interrupt is generated, and this
1629 * method is called. This method marks the reservation as stopped, and passes
1630 * control off to the user-registered callback method, along with the
1631 * reservation (so that the user can, for example, write a 0 to the counter, and
1632 * restart the reservation).
1633 * This method assumes the reservation has a valid pmc_config_t within.
1634 *
1635 * @param target The pmc_reservation_t that caused the interrupt.
1636 * @param refCon User specified reference constant.
1637 */
1638static void pmc_reservation_interrupt(void *target, void *refCon) {
1639 pmc_reservation_t reservation = (pmc_reservation_t)target;
1640 pmc_state_t newState;
1641 uint64_t timeout;
1642 uint32_t spins;
1643
1644 assert(reservation);
1645
1646 /* Move the state machine */
1647 if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
1648 return;
1649 }
1650
1651 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
1652 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
1653 * on every cpu that can access the PMC.
1654 */
1655 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
1656
1657 /* Spin waiting for the state to turn to INTERRUPT */
1658 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
1659 timeout += mach_absolute_time();
1660 spins = 0;
1661 while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
1662 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
1663 if (++spins > PMC_SPIN_THRESHOLD) {
1664 if (mach_absolute_time() > timeout) {
1665 pmc_spin_timeout_count++;
1666 assert(0);
1667 }
1668 }
1669
1670 cpu_pause();
1671 }
1672
1673 assert(reservation->config);
1674 assert(reservation->config->method);
1675
1676 /* Call the registered callback handler */
1677#if DEBUG_COUNTERS
1678 uint64_t start = mach_absolute_time();
1679#endif /* DEBUG */
1680
1681 (void)reservation->config->method(reservation, refCon);
1682
1683#if DEBUG_COUNTERS
1684 uint64_t end = mach_absolute_time();
1685 if((end - start) > 5000ULL) {
1686 kprintf("%s - user method %p took %llu ns\n", __FUNCTION__,
1687 reservation->config->method, (end - start));
1688 }
1689#endif
1690
1691 /* Move the state machine */
1692 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
1693 return;
1694 }
1695
1696 /* Do any post-move actions necessary */
1697 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
1698 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
1699 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1700 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1701 thread_wakeup((event_t)reservation);
1702 }
1703}
1704
1705/*
1706 * Apple-private KPI for Apple kext's (IOProfileFamily) only
1707 */
1708
1709#if 0
1710#pragma mark -
1711#pragma mark IOProfileFamily private KPI
1712#endif
1713
1714/*
1715 * perf_monitor_register registers a new Performance Monitor, and its associated
1716 * callback methods. The given perf_monitor_object_t is the first argument to
1717 * each callback when they are called.
1718 */
1719kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
1720 perf_monitor_methods_t *methods) {
1721
1722 COUNTER_DEBUG("registering perf monitor %p\n", monitor);
1723
1724 if(!monitor || !methods) {
1725 return KERN_INVALID_ARGUMENT;
1726 }
1727
1728 /* Protect against out-of-date driver kexts */
1729 if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
1730 return KERN_INVALID_ARGUMENT;
1731 }
1732
1733 /* All methods are required */
1734 if(!methods->supports_context_switching || !methods->enable_counters ||
1735 !methods->disable_counters) {
1736 return KERN_INVALID_ARGUMENT;
1737 }
1738
1739 /* prevent dupes. */
1740 perf_monitor_t dupe = perf_monitor_find(monitor);
1741 if(dupe) {
1742 COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
1743 perf_monitor_deallocate(dupe);
1744 return KERN_FAILURE;
1745 }
1746
1747 perf_monitor_t pm = perf_monitor_alloc();
1748 if(!pm) {
1749 return KERN_RESOURCE_SHORTAGE;
1750 }
1751
1752 /* initialize the object */
1753 perf_monitor_init(pm);
1754
1755 /* copy in the registration info */
1756 pm->object = monitor;
1757 memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
1758
1759 /* place it in the tracking queue */
1760 perf_monitor_enqueue(pm);
1761
1762 /* debug it */
1763 PRINT_PERF_MON(pm);
1764
1765 return KERN_SUCCESS;
1766}
1767
1768/*
1769 * perf_monitor_unregister unregisters a previously registered Perf Monitor,
1770 * looking it up by reference pointer (the same that was used in
1771 * perf_monitor_register()).
1772 */
1773kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
1774 kern_return_t ret = KERN_FAILURE;
1775
1776 COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);
1777
1778 if(!monitor) {
1779 return KERN_INVALID_ARGUMENT;
1780 }
1781
1782 perf_monitor_t pm = perf_monitor_find(monitor);
1783 if(pm) {
1784 /* Remove it from the queue. */
1785 perf_monitor_dequeue(pm);
1786
1787 /* drop extra retain from find */
1788 perf_monitor_deallocate(pm);
1789
1790 /* and release the object */
1791 perf_monitor_deallocate(pm);
1792
1793 ret = KERN_SUCCESS;
1794 } else {
1795 COUNTER_DEBUG("could not find a registered pm that matches!\n");
1796 }
1797
1798 return ret;
1799}
1800
1801/*
1802 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
1803 * associated with a Perf Monitor. Perf Monitors are looked up by the reference
1804 * pointer that was used to previously register them.
1805 *
1806 * PMCs are registered with a reference pointer (@pmc_object), and a set of
1807 * callback methods. When the given callback methods are called from xnu, the
1808 * first argument will always be the reference pointer used to register the PMC.
1809 *
1810 * NOTE: @monitor must have been successfully registered via
1811 * perf_monitor_register before this method will succeed.
1812 */
1813kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
1814 pmc_methods_t *methods, void *object) {
1815
1816 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1817
1818 if(!monitor || !pmc_object || !methods || !object) {
1819 return KERN_INVALID_ARGUMENT;
1820 }
1821
1822 /* Prevent version mismatches */
1823 if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
1824 COUNTER_DEBUG("version mismatch\n");
1825 return KERN_INVALID_ARGUMENT;
1826 }
1827
1828 /* All methods are required. */
1829 if(!methods->create_config ||
1830 !methods->free_config ||
1831 !methods->config_set_value ||
1832 !methods->config_set_threshold ||
1833 !methods->config_set_handler ||
1834 !methods->set_config ||
1835 !methods->get_monitor ||
1836 !methods->get_name ||
1837 !methods->accessible_from_core ||
1838 !methods->accessible_cores ||
1839 !methods->get_count ||
1840 !methods->set_count ||
1841 !methods->disable ||
1842 !methods->enable ||
1843 !methods->open ||
1844 !methods->close) {
1845 return KERN_INVALID_ARGUMENT;
1846 }
1847
1848 /* make sure this perf monitor object is already registered */
1849 /*
1850 * NOTE: this adds a reference to the parent, so we'll have to drop it in
1851 * any failure code paths from here on out.
1852 */
1853 perf_monitor_t pm = perf_monitor_find(monitor);
1854 if(!pm) {
1855 COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
1856 return KERN_INVALID_ARGUMENT;
1857 }
1858
1859 /* make a new pmc */
1860 pmc_t pmc = pmc_alloc();
1861 if(!pmc) {
1862 /* drop the extra reference from perf_monitor_find() */
1863 perf_monitor_deallocate(pm);
1864 return KERN_RESOURCE_SHORTAGE;
1865 }
1866
1867 /* init it */
1868 pmc_init(pmc);
1869
1870 pmc->object = pmc_object;
1871 pmc->open_object = object;
1872
1873 /* copy the callbacks in */
1874 memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));
1875
1876 pmc->monitor = pm;
1877
1878 perf_monitor_add_pmc(pmc->monitor, pmc);
1879
1880 /* enqueue it in our tracking queue */
1881 pmc_enqueue(pmc);
1882
1883 /* drop extra reference from perf_monitor_find() */
1884 perf_monitor_deallocate(pm);
1885
1886 return KERN_SUCCESS;
1887}
1888
1889/*
1890 * pmc_unregister unregisters a previously registered PMC, looking it up by
1891 * reference point to *both* the Perf Monitor it was created with, and the PMC's
1892 * reference pointer itself.
1893 */
1894kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
1895 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1896
1897 if(!monitor || !pmc_object) {
1898 return KERN_INVALID_ARGUMENT;
1899 }
1900
1901 pmc_t pmc = pmc_find(pmc_object);
1902 if(!pmc) {
1903 COUNTER_DEBUG("Could not find a matching pmc.\n");
1904 return KERN_FAILURE;
1905 }
1906
1907 /* remove it from the global queue */
1908 pmc_dequeue(pmc);
1909
1910 perf_monitor_remove_pmc(pmc->monitor, pmc);
1911
1912 /* remove extra reference count from pmc_find() */
1913 pmc_deallocate(pmc);
1914
1915 /* dealloc the pmc */
1916 pmc_deallocate(pmc);
1917
1918 return KERN_SUCCESS;
1919}
1920
1921#if 0
1922#pragma mark -
1923#pragma mark KPI
1924#endif
1925
1926/*
1927 * Begin in-kernel and in-kext KPI methods
1928 */
1929
1930/*
1931 * pmc_create_config creates a new configuration area from a given @pmc.
1932 *
1933 * NOTE: This method is not interrupt safe.
1934 */
1935kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
1936 pmc_config_t tmp = NULL;
1937
1938 if(!pmc || !config) {
1939 return KERN_INVALID_ARGUMENT;
1940 }
1941
1942 pmc_reference(pmc);
1943
1944 tmp = pmc_config_alloc(pmc);
1945 if(tmp) {
1946 tmp->object = pmc->methods.create_config(pmc->object);
1947
1948 if(!tmp->object) {
1949 pmc_config_free(pmc, tmp);
1950 tmp = NULL;
1951 } else {
1952 tmp->interrupt_after_value = 0ULL;
1953 tmp->method = NULL;
1954 tmp->refCon = NULL;
1955 }
1956 }
1957
1958 pmc_deallocate(pmc);
1959
1960 if(!tmp) {
1961 return KERN_RESOURCE_SHORTAGE;
1962 }
1963
1964 *config = tmp;
1965
1966 return KERN_SUCCESS;
1967}
1968
1969/*
1970 * pmc_free_config frees a configuration area created from a given @pmc
1971 *
1972 * NOTE: This method is not interrupt safe.
1973 */
1974void pmc_free_config(pmc_t pmc, pmc_config_t config) {
1975 assert(pmc);
1976 assert(config);
1977
1978 pmc_reference(pmc);
1979
1980 pmc_config_free(pmc, config);
1981
1982 pmc_deallocate(pmc);
1983}
1984
1985/*
1986 * pmc_config_set_value sets up configuration area key-value pairs. These pairs
1987 * are to be either pre-known, or looked up via CoreProfile.framework.
1988 *
1989 * NOTE: This method is not interrupt safe.
1990 */
1991kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
1992 uint8_t id, uint64_t value) {
1993
1994 kern_return_t ret = KERN_INVALID_ARGUMENT;
1995
1996 if(!pmc || !config) {
1997 return ret;
1998 }
1999
2000 pmc_reference(pmc);
2001
2002 ret = pmc->methods.config_set_value(config->object, id, value);
2003
2004 pmc_deallocate(pmc);
2005
2006 return ret;
2007}
2008
2009/*
2010 * pmc_config_set_interrupt_threshold modifies a config object, instructing
2011 * the pmc that it should generate a call to the given pmc_interrupt_method_t
2012 * after the counter counts @threshold events.
2013 *
2014 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
2015 * as the first argument when the interrupt handler is invoked, and the given
2016 * @refCon (which may be NULL) as the second.
2017 *
2018 * See pmc_interrupt_method_t.
2019 *
2020 * NOTE: This method is not interrupt safe.
2021 */
2022kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config,
2023 uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
2024 kern_return_t ret = KERN_INVALID_ARGUMENT;
2025
2026 if(!config || !pmc) {
2027 return ret;
2028 }
2029
2030 assert(config);
2031 assert(pmc);
2032
2033 pmc_reference(pmc);
2034
2035 do {
2036 /*
2037 * We have a minor annoyance to side-step here. The driver layer expects
2038 * the config to never change once a reservation has been taken out with
2039 * it. However, in order to have the PMI method have the reservation as
2040 * the first argument (in order to allow the user-method to, for
2041 * example, write a 0 to it, and restart it), we need to create the
2042 * pmc_reservation_t before setting it up in the config object.
2043 * We overcome this by caching the method in the pmc_config_t stand-in,
2044 * and mutating the pmc_config_object_t just before returning a
2045 * reservation (in pmc_reserve() and friends, below).
2046 */
2047
2048 /* might as well stash this away too. */
2049 config->interrupt_after_value = threshold;
2050 config->method = method;
2051 config->refCon = refCon;
2052
2053 ret = KERN_SUCCESS;
2054
2055 }while(0);
2056
2057 pmc_deallocate(pmc);
2058
2059 return ret;
2060}
2061
2062/*
2063 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
2064 * of pmc_t's returned. Callers should free this list with a call to
2065 * pmc_free_pmc_list().
2066 *
2067 * NOTE: This method is not interrupt safe.
2068 */
2069kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
2070 pmc_t *array = NULL;
2071 pmc_t pmc = NULL;
2072 size_t count = 0UL;
2073
2074 do {
2075 /* Copy down (to the stack) the count of perf counters */
2076 vm_size_t size = perf_counters_count;
2077
2078 /* Allocate that sized chunk */
2079 array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
2080 if(!array) {
2081 return KERN_RESOURCE_SHORTAGE;
2082 }
2083
2084 /* Take the spin lock */
2085 lck_spin_lock(&perf_counters_queue_spin);
2086
2087 /* verify the size didn't change while we were allocating */
2088 if(size != perf_counters_count) {
2089 /*
2090 * queue size has changed between alloc and now - go back and
2091 * make another pass.
2092 */
2093
2094 /* drop the lock */
2095 lck_spin_unlock(&perf_counters_queue_spin);
2096
2097 /* free the block */
2098 kfree(array, sizeof(pmc_t) * size);
2099 array = NULL;
2100 }
2101
2102 /* if we get here, and array is NULL, we try again. */
2103 }while(!array);
2104
2105 /* copy the bits out */
2106 queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
2107 if(pmc) {
2108 /* copy out the pointer */
2109 array[count++] = pmc;
2110 }
2111 }
2112
2113 lck_spin_unlock(&perf_counters_queue_spin);
2114
2115 /* return the list and the size */
2116 *pmcs = array;
2117 *pmcCount = count;
2118
2119 return KERN_SUCCESS;
2120}
2121
2122/*
2123 * pmc_free_pmc_list frees an array of pmc_t that has been returned from
2124 * pmc_get_pmc_list.
2125 *
2126 * NOTE: This method is not interrupt safe.
2127 */
2128void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
2129 if(pmcs && pmcCount) {
2130 COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);
2131
2132 kfree(pmcs, pmcCount * sizeof(pmc_t));
2133 }
2134}
2135
2136kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
2137 kern_return_t ret = KERN_INVALID_ARGUMENT;
2138
2139 if(!name || !pmcs || !pmcCount) {
2140 return ret;
2141 }
2142
2143 pmc_t *list = NULL;
2144 size_t count = 0UL;
2145
2146 if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
2147 size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
2148 size_t len = strlen(name);
2149
2150 for(ii = 0UL; ii < count; ii++) {
2151 const char *pmcName = pmc_get_name(list[ii]);
2152
2153 if(strlen(pmcName) < len) {
2154 /*
2155 * If the pmc name is shorter than the requested match, it's no
2156 * match, as we're looking for the most specific match(es).
2157 */
2158 continue;
2159 }
2160
2161 if(0 == strncmp(name, pmcName, len)) {
2162 pmc_t temp = list[ii];
2163
2164 // move matches to the head of the array.
2165 list[ii] = list[swapPtr];
2166 list[swapPtr] = temp;
2167 swapPtr++;
2168
2169 // keep a count of the matches
2170 matchCount++;
2171 }
2172 }
2173
2174 if(matchCount) {
2175 /*
2176 * If we have matches, they are all at the head of the array, so
2177 * just allocate enough space for @matchCount pmc_t's, and copy the
2178 * head of the array to the new allocation. Then free the old
2179 * allocation.
2180 */
2181
2182 pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
2183 if(result) {
2184 // copy the matches
2185 memcpy(result, list, sizeof(pmc_t) * matchCount);
2186
2187 ret = KERN_SUCCESS;
2188 }
2189
2190 pmc_free_pmc_list(list, count);
2191
2192 if(!result) {
2193 *pmcs = NULL;
2194 *pmcCount = 0UL;
2195 return KERN_RESOURCE_SHORTAGE;
2196 }
2197
2198 *pmcs = result;
2199 *pmcCount = matchCount;
2200 } else {
2201 *pmcs = NULL;
2202 *pmcCount = 0UL;
2203 }
2204 }
2205
2206 return ret;
2207}
2208
2209/*
2210 * pmc_get_name returns a pointer (not copied) to the human-readable name of the
2211 * given pmc.
2212 *
2213 * NOTE: Driver authors must take care to not allocate during this method, as
2214 * this method *IS* interrupt safe.
2215 */
2216const char *pmc_get_name(pmc_t pmc) {
2217 assert(pmc);
2218
2219 const char *name = pmc->methods.get_name(pmc->object);
2220
2221 return name;
2222}
2223
2224/*
2225 * pmc_get_accessible_core_list returns a pointer to an array of logical core
2226 * numbers (as well as the size of that array) that represent the local cores
2227 * (hardware threads) from which the given @pmc can be accessed directly.
2228 *
2229 * NOTE: This method is interrupt safe.
2230 */
2231kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
2232 size_t *logicalCoreCt) {
2233
2234 kern_return_t ret = KERN_INVALID_ARGUMENT;
2235
2236 if(!pmc || !logicalCores || !logicalCoreCt) {
2237 return ret;
2238 }
2239
2240 ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);
2241
2242 return ret;
2243}
2244
2245/*
2246 * pmc_accessible_from_core will return TRUE if the given @pmc is directly
2247 * (e.g., hardware) readable from the given logical core.
2248 *
2249 * NOTE: This method is interrupt safe.
2250 */
2251boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
2252 boolean_t ret = FALSE;
2253
2254 assert(pmc);
2255
2256 ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
2257
2258 return ret;
2259}
2260
2261static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
2262 assert(resv);
2263 assert(resv->pmc);
2264 assert(config);
2265 assert(config->object);
2266
2267 /* If there's no PMI to setup, return success */
2268 if(config->interrupt_after_value && config->method) {
2269
2270 /* set the threshold */
2271 kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
2272 config->interrupt_after_value);
2273
2274 if(KERN_SUCCESS != ret) {
2275 /*
2276 * This is the most useful error message here, as this only happens
2277 * as a result of pmc_reserve*()
2278 */
2279 COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
2280 return FALSE;
2281 }
2282
2283 if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object,
2284 (void *)resv, &pmc_reservation_interrupt, config->refCon)) {
2285
2286 COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
2287 return FALSE;
2288 }
2289 }
2290
2291 return TRUE;
2292}
2293
2294/*
2295 * pmc_reserve will attempt to reserve the given @pmc, with a given
2296 * configuration object, for counting system-wide. This method will fail with
2297 * KERN_FAILURE if the given pmc is already reserved at any scope.
2298 *
2299 * This method consumes the given configuration object if it returns
2300 * KERN_SUCCESS. Any other return value indicates the caller
2301 * must free the config object via pmc_free_config().
2302 *
2303 * NOTE: This method is NOT interrupt safe.
2304 */
2305kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
2306 pmc_reservation_t *reservation) {
2307
2308 if(!pmc || !config || !reservation) {
2309 return KERN_INVALID_ARGUMENT;
2310 }
2311
2312 pmc_reservation_t resv = reservation_alloc();
2313 if(!resv) {
2314 return KERN_RESOURCE_SHORTAGE;
2315 }
2316
2317 reservation_init(resv);
2318
2319 resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
2320 resv->config = config;
2321
2322 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2323 resv->config = NULL;
2324 return KERN_FAILURE;
2325 }
2326
2327 /* enqueue reservation in proper place */
2328 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2329 /* Prevent free of config object */
2330 resv->config = NULL;
2331
2332 reservation_free(resv);
2333 return KERN_FAILURE;
2334 }
2335
2336 /* Here's where we setup the PMI method (if needed) */
2337
2338 *reservation = resv;
2339
2340 return KERN_SUCCESS;
2341}
2342
2343/*
2344 * pmc_reserve_task will attempt to reserve the given @pmc with a given
2345 * configuration object, for counting when the given @task is running on any
2346 * logical core that can directly access the given @pmc. This method will fail
2347 * with KERN_FAILURE if the given pmc is already reserved at either system or
2348 * thread scope.
2349 *
2350 * This method consumes the given configuration object if it returns
2351 * KERN_SUCCESS. Any other return value indicates the caller
2352 * must free the config object via pmc_free_config().
2353 *
2354 * NOTE: You can reserve the same pmc for N different tasks concurrently.
2355 * NOTE: This method is NOT interrupt safe.
2356 */
2357kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config,
2358 task_t task, pmc_reservation_t *reservation) {
2359
2360 if(!pmc || !config || !reservation || !task) {
2361 return KERN_INVALID_ARGUMENT;
2362 }
2363
2364 if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
2365 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2366 return KERN_INVALID_ARGUMENT;
2367 }
2368
2369 pmc_reservation_t resv = reservation_alloc();
2370 if(!resv) {
2371 return KERN_RESOURCE_SHORTAGE;
2372 }
2373
2374 reservation_init(resv);
2375
2376 resv->flags |= PMC_FLAG_SCOPE_TASK;
2377 resv->task = task;
2378
2379 resv->config = config;
2380
2381 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2382 resv->config = NULL;
2383 return KERN_FAILURE;
2384 }
2385
2386 /* enqueue reservation in proper place */
2387 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2388 /* Prevent free of config object */
2389 resv->config = NULL;
2390
2391 reservation_free(resv);
2392 return KERN_FAILURE;
2393 }
2394
2395 *reservation = resv;
2396
2397 return KERN_SUCCESS;
2398}
2399
2400/*
2401 * pmc_reserve_thread will attempt to reserve the given @pmc with a given
2402 * configuration object, for counting when the given @thread is running on any
2403 * logical core that can directly access the given @pmc. This method will fail
2404 * with KERN_FAILURE if the given pmc is already reserved at either system or
2405 * task scope.
2406 *
2407 * This method consumes the given configuration object if it returns
2408 * KERN_SUCCESS. Any other return value indicates the caller
2409 * must free the config object via pmc_free_config().
2410 *
2411 * NOTE: You can reserve the same pmc for N different threads concurrently.
2412 * NOTE: This method is NOT interrupt safe.
2413 */
2414kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config,
2415 thread_t thread, pmc_reservation_t *reservation) {
2416 if(!pmc || !config || !reservation || !thread) {
2417 return KERN_INVALID_ARGUMENT;
2418 }
2419
2420 if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
2421 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2422 return KERN_INVALID_ARGUMENT;
2423 }
2424
2425 pmc_reservation_t resv = reservation_alloc();
2426 if(!resv) {
2427 return KERN_RESOURCE_SHORTAGE;
2428 }
2429
2430 reservation_init(resv);
2431
2432 resv->flags |= PMC_FLAG_SCOPE_THREAD;
2433 resv->thread = thread;
2434
2435 resv->config = config;
2436
2437 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2438 resv->config = NULL;
2439 return KERN_FAILURE;
2440 }
2441
2442 /* enqueue reservation in proper place */
2443 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2444 /* Prevent free of config object */
2445 resv->config = NULL;
2446
2447 reservation_free(resv);
2448 return KERN_FAILURE;
2449 }
2450
2451 *reservation = resv;
2452
2453 return KERN_SUCCESS;
2454}
2455
2456/*
2457 * pmc_reservation_start instructs the given reservation to start counting as
2458 * soon as possible.
2459 *
2460 * NOTE: This method is interrupt safe.
2461 */
2462kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
2463 pmc_state_t newState;
2464
2465 if(!reservation) {
2466 return KERN_INVALID_ARGUMENT;
2467 }
2468
2469 /* Move the state machine */
2470 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
2471 return KERN_FAILURE;
2472 }
2473
2474 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2475 * broadcast right before it leaves
2476 */
2477 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {
2478 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2479 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
2480 * on every cpu that can access the PMC.
2481 */
2482 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
2483 }
2484
2485 return KERN_SUCCESS;
2486}
2487
2488/*
2489 * pmc_reservation_stop instructs the given reservation to stop counting as
2490 * soon as possible. When this method returns, the pmc will be marked as stopping
2491 * and subsequent calls to pmc_reservation_start will succeed. This does not mean
2492 * that the pmc hardware has _actually_ stopped running. Assuming no other changes
2493 * to the reservation state, the pmc hardware _will_ stop shortly.
2494 *
2495 */
2496kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
2497 pmc_state_t newState;
2498
2499 if(!reservation) {
2500 return KERN_INVALID_ARGUMENT;
2501 }
2502
2503 /* Move the state machine */
2504 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
2505 return KERN_FAILURE;
2506 }
2507
2508 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2509 * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting.
2510 */
2511 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {
2512 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2513 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2514 * on every cpu that can access the PMC.
2515 */
2516
2517 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2518 }
2519
2520 return KERN_SUCCESS;
2521}
2522
2523/*
2524 * pmc_reservation_read will read the event count associated with a reservation.
2525 * If the caller is current executing in a context that both a) matches the
2526 * reservation's context, and b) can access the reservation's pmc directly, the
2527 * value will be read from hardware. Otherwise, this returns the reservation's
2528 * stored value.
2529 *
2530 * NOTE: This method is interrupt safe.
2531 * NOTE: When not on the interrupt stack, this method may block.
2532 */
2533kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
2534 kern_return_t ret = KERN_FAILURE;
2535 uint64_t timeout;
2536 uint32_t spins;
2537
2538 if(!reservation || !value) {
2539 return KERN_INVALID_ARGUMENT;
2540 }
2541
2542 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2543 timeout += mach_absolute_time();
2544 spins = 0;
2545 do {
2546 uint32_t state = reservation->state;
2547
2548 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2549 /* Attempt read from hardware via drivers. */
2550
2551 assert(reservation->pmc);
2552
2553 ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
2554
2555 break;
2556 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2557 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2558 /* Spin */
2559 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2560 if (++spins > PMC_SPIN_THRESHOLD) {
2561 if (mach_absolute_time() > timeout) {
2562 pmc_spin_timeout_count++;
2563 assert(0);
2564 }
2565 }
2566
2567 cpu_pause();
2568 } else {
2569 break;
2570 }
2571 } while (1);
2572
2573 /* If the direct hardware read failed (for whatever reason) */
2574 if(KERN_SUCCESS != ret) {
2575 /* Read stored value */
2576 *value = reservation->value;
2577 }
2578
2579 return KERN_SUCCESS;
2580}
2581
2582/*
2583 * pmc_reservation_write will write the event count associated with a reservation.
2584 * If the caller is current executing in a context that both a) matches the
2585 * reservation's context, and b) can access the reservation's pmc directly, the
2586 * value will be written to hardware. Otherwise, this writes the reservation's
2587 * stored value.
2588 *
2589 * NOTE: This method is interrupt safe.
2590 * NOTE: When not on the interrupt stack, this method may block.
2591 */
2592kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
2593 kern_return_t ret = KERN_FAILURE;
2594 uint64_t timeout;
2595 uint32_t spins;
2596
2597 if(!reservation) {
2598 return KERN_INVALID_ARGUMENT;
2599 }
2600
2601 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2602 timeout += mach_absolute_time();
2603 spins = 0;
2604 do {
2605 uint32_t state = reservation->state;
2606
2607 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2608 /* Write to hardware via drivers. */
2609 assert(reservation->pmc);
2610
2611 ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
2612 break;
2613 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2614 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2615 /* Spin */
2616 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2617 if (++spins > PMC_SPIN_THRESHOLD) {
2618 if (mach_absolute_time() > timeout) {
2619 pmc_spin_timeout_count++;
2620 assert(0);
2621 }
2622 }
2623
2624 cpu_pause();
2625 } else {
2626 break;
2627 }
2628 } while (1);
2629
2630 if(KERN_SUCCESS != ret) {
2631 /* Write stored value */
2632 reservation->value = value;
2633 }
2634
2635 return KERN_SUCCESS;
2636}
2637
2638/*
2639 * pmc_reservation_free releases a reservation and all associated resources.
2640 *
2641 * NOTE: This method is NOT interrupt safe.
2642 */
2643kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
2644 pmc_state_t newState;
2645
2646 if(!reservation) {
2647 return KERN_INVALID_ARGUMENT;
2648 }
2649
2650 /* Move the state machine */
2651 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
2652 return KERN_FAILURE;
2653 }
2654
2655 /* If we didn't move directly to DEALLOC, help things along */
2656 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {
2657 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2658 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2659 * on every cpu that can access the PMC.
2660 */
2661 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2662 }
2663
2664 /* Block until the reservation hits the <DEALLOC, 0, > state */
2665 while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
2666 assert_wait((event_t)reservation, THREAD_UNINT);
2667 thread_block(THREAD_CONTINUE_NULL);
2668 }
2669
2670 /* remove from queues */
2671 pmc_internal_reservation_remove(reservation);
2672
2673 /* free reservation */
2674 reservation_free(reservation);
2675
2676 return KERN_SUCCESS;
2677}
2678
2679/*
2680 * pmc_context_switch performs all context switching necessary to save all pmc
2681 * state associated with @oldThread (and the task to which @oldThread belongs),
2682 * as well as to restore all pmc state associated with @newThread (and the task
2683 * to which @newThread belongs).
2684 *
2685 * NOTE: This method IS interrupt safe.
2686 */
2687boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
2688 pmc_reservation_t resv = NULL;
2689 uint32_t cpuNum = cpu_number();
2690
2691 /* Out going thread: save pmc state */
2692 lck_spin_lock(&reservations_spin);
2693
2694 /* interate over any reservations */
2695 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2696 if(resv && oldThread == resv->thread) {
2697
2698 /* check if we can read the associated pmc from this core. */
2699 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2700 /* save the state At this point, if it fails, it fails. */
2701 (void)pmc_internal_reservation_context_out(resv);
2702 }
2703 }
2704 }
2705
2706 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2707 if(resv && resv->task == oldThread->task) {
2708 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2709 (void)pmc_internal_reservation_context_out(resv);
2710 }
2711 }
2712 }
2713
2714 /* Incoming task: restore */
2715
2716 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2717 if(resv && resv->thread == newThread) {
2718 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2719 (void)pmc_internal_reservation_context_in(resv);
2720 }
2721 }
2722 }
2723
2724
2725 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2726 if(resv && resv->task == newThread->task) {
2727 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2728 (void)pmc_internal_reservation_context_in(resv);
2729 }
2730 }
2731 }
2732
2733 lck_spin_unlock(&reservations_spin);
2734
2735 return TRUE;
2736}
2737
2738#else /* !CONFIG_COUNTERS */
2739
2740#if 0
2741#pragma mark -
2742#pragma mark Dummy functions
2743#endif
2744
2745/*
2746 * In the case that someone has chosen not to include the PMC KPI in some
2747 * configuration, we still have exports for kexts, so we'll need to define stub
2748 * methods that return failures.
2749 */
2750kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
2751 perf_monitor_methods_t *methods __unused) {
2752 return KERN_FAILURE;
2753}
2754
2755kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
2756 return KERN_FAILURE;
2757}
2758
2759kern_return_t pmc_register(perf_monitor_object_t monitor __unused,
2760 pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
2761 return KERN_FAILURE;
2762}
2763
2764kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
2765 pmc_object_t pmc __unused) {
2766 return KERN_FAILURE;
2767}
2768
2769kern_return_t pmc_create_config(pmc_t pmc __unused,
2770 pmc_config_t *config __unused) {
2771 return KERN_FAILURE;
2772}
2773
2774void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
2775}
2776
2777kern_return_t pmc_config_set_value(pmc_t pmc __unused,
2778 pmc_config_t config __unused, uint8_t id __unused,
2779 uint64_t value __unused) {
2780 return KERN_FAILURE;
2781}
2782
2783kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused,
2784 pmc_config_t config __unused, uint64_t threshold __unused,
2785 pmc_interrupt_method_t method __unused, void *refCon __unused) {
2786 return KERN_FAILURE;
2787}
2788
2789kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
2790 return KERN_FAILURE;
2791}
2792
2793void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
2794}
2795
2796kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused,
2797 size_t *pmcCount __unused) {
2798 return KERN_FAILURE;
2799}
2800
2801const char *pmc_get_name(pmc_t pmc __unused) {
2802 return "";
2803}
2804
2805kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused,
2806 uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
2807 return KERN_FAILURE;
2808}
2809
2810boolean_t pmc_accessible_from_core(pmc_t pmc __unused,
2811 uint32_t logicalCore __unused) {
2812 return FALSE;
2813}
2814
2815kern_return_t pmc_reserve(pmc_t pmc __unused,
2816 pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
2817 return KERN_FAILURE;
2818}
2819
2820kern_return_t pmc_reserve_task(pmc_t pmc __unused,
2821 pmc_config_t config __unused, task_t task __unused,
2822 pmc_reservation_t *reservation __unused) {
2823 return KERN_FAILURE;
2824}
2825
2826kern_return_t pmc_reserve_thread(pmc_t pmc __unused,
2827 pmc_config_t config __unused, thread_t thread __unused,
2828 pmc_reservation_t *reservation __unused) {
2829 return KERN_FAILURE;
2830}
2831
2832kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
2833 return KERN_FAILURE;
2834}
2835
2836kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
2837 return KERN_FAILURE;
2838}
2839
2840kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused,
2841 uint64_t *value __unused) {
2842 return KERN_FAILURE;
2843}
2844
2845kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused,
2846 uint64_t value __unused) {
2847 return KERN_FAILURE;
2848}
2849
2850kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
2851 return KERN_FAILURE;
2852}
2853
2854
2855#endif /* !CONFIG_COUNTERS */