]> git.saurik.com Git - apple/xnu.git/blob - osfmk/pmc/pmc.c
f5a8948231b24e821dc7da48a1b22ce54f57713a
[apple/xnu.git] / osfmk / pmc / pmc.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <kern/kalloc.h>
25 #include <kern/kern_types.h>
26 #include <kern/locks.h>
27 #include <kern/misc_protos.h>
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/zalloc.h>
31 #include <machine/machine_cpu.h>
32
33 #include <pmc/pmc.h>
34
35 #include <libkern/OSAtomic.h>
36
37 #if defined(__i386__) || defined(__x86_64__)
38 #include <i386/mp.h>
39 #endif
40
41 #if CONFIG_COUNTERS
42
43 /* various debug logging enable */
44 #undef DEBUG_COUNTERS
45
46 typedef uint8_t pmc_state_event_t;
47
48 #define PMC_STATE_EVENT_START 0
49 #define PMC_STATE_EVENT_STOP 1
50 #define PMC_STATE_EVENT_FREE 2
51 #define PMC_STATE_EVENT_INTERRUPT 3
52 #define PMC_STATE_EVENT_END_OF_INTERRUPT 4
53 #define PMC_STATE_EVENT_CONTEXT_IN 5
54 #define PMC_STATE_EVENT_CONTEXT_OUT 6
55 #define PMC_STATE_EVENT_LOAD_FINISHED 7
56 #define PMC_STATE_EVENT_STORE_FINISHED 8
57
58 /* PMC spin timeouts */
59 #define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */
60 #define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */
61
62 uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */
63
64 #ifdef DEBUG_COUNTERS
65 # include <pexpert/pexpert.h>
66 # define COUNTER_DEBUG(...) \
67 do { \
68 kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
69 kprintf(__VA_ARGS__); \
70 } while(0)
71
72 # define PRINT_PERF_MON(x) \
73 do { \
74 kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
75 x, x->object, x->useCount, \
76 x->methods.supports_context_switching ? \
77 x->methods.supports_context_switching(x->object) : 0); \
78 } while(0)
79
80 static const char const * pmc_state_state_name(pmc_state_t state) {
81 switch (PMC_STATE_STATE(state)) {
82 case PMC_STATE_STATE_INVALID:
83 return "INVALID";
84 case PMC_STATE_STATE_STOP:
85 return "STOP";
86 case PMC_STATE_STATE_CAN_RUN:
87 return "CAN_RUN";
88 case PMC_STATE_STATE_LOAD:
89 return "LOAD";
90 case PMC_STATE_STATE_RUN:
91 return "RUN";
92 case PMC_STATE_STATE_STORE:
93 return "STORE";
94 case PMC_STATE_STATE_INTERRUPT:
95 return "INTERRUPT";
96 case PMC_STATE_STATE_DEALLOC:
97 return "DEALLOC";
98 default:
99 return "UNKNOWN";
100 }
101 }
102
103 static const char const * pmc_state_event_name(pmc_state_event_t event) {
104 switch (event) {
105 case PMC_STATE_EVENT_START:
106 return "START";
107 case PMC_STATE_EVENT_STOP:
108 return "STOP";
109 case PMC_STATE_EVENT_FREE:
110 return "FREE";
111 case PMC_STATE_EVENT_INTERRUPT:
112 return "INTERRUPT";
113 case PMC_STATE_EVENT_END_OF_INTERRUPT:
114 return "END OF INTERRUPT";
115 case PMC_STATE_EVENT_CONTEXT_IN:
116 return "CONTEXT IN";
117 case PMC_STATE_EVENT_CONTEXT_OUT:
118 return "CONTEXT OUT";
119 case PMC_STATE_EVENT_LOAD_FINISHED:
120 return "LOAD_FINISHED";
121 case PMC_STATE_EVENT_STORE_FINISHED:
122 return "STORE_FINISHED";
123 default:
124 return "UNKNOWN";
125 }
126 }
127
128 # define PMC_STATE_FORMAT "<%s, %u, %s%s%s>"
129 # define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
130 ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
131 #else
132 # define COUNTER_DEBUG(...)
133 # define PRINT_PERF_MON(x)
134 # define PMC_STATE_FORMAT
135 # define PMC_STATE_ARGS(x)
136 #endif
137
138 /*!struct
139 * pmc_config is the data behind a pmc_config_t.
140 * @member object A pointer to an instance of IOPerformanceCounterConfiguration
141 * @member method A pointer to a method to call to handle PMI.
142 * @member interrupt_after_value Cause a PMI after the counter counts this many
143 * events.
144 * @member refCon Passed to the @method method as the refCon argument.
145 */
146 struct pmc_config {
147 pmc_config_object_t object;
148 volatile pmc_interrupt_method_t method;
149 uint64_t interrupt_after_value;
150 void *refCon;
151 };
152
153 /*
154 * Allocation Zones
155 *
156 * Two allocation zones - Perf zone small and Perf zone big.
157 * Each zone has associated maximums, defined below.
158 * The small zone is the max of the smallest allocation objects (all sizes on
159 * K64):
160 * perf_monitor_t - 48 bytes
161 * perf_monitor_methods_t - 28 bytes
162 * pmc_reservation_t - 48 bytes
163 * pmc_config_t - 32 bytes
164 * perf_small_zone unit size is (on K64) 48 bytes
165 * perf_small_zone max count must be max number of perf monitors, plus (max
166 * number of reservations * 2). The "*2" is because each reservation has a
167 * pmc_config_t within.
168 *
169 * Big zone is max of the larger allocation units
170 * pmc_t - 144 bytes
171 * pmc_methods_t - 116 bytes
172 * perf_big_zone unit size is (on K64) 144 bytes
173 * perf_big_zone max count is the max number of PMCs we support.
174 */
175
176 static zone_t perf_small_zone = NULL;
177 #define MAX_PERF_SMALLS (256 + 8196 + 8196)
178 #define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \
179 sizeof(struct pmc_reservation)), sizeof(struct pmc_config)))
180
181 static zone_t perf_big_zone = NULL;
182 #define MAX_PERF_BIGS (1024)
183 #define PERF_BIG_UNIT_SZ (sizeof(struct pmc))
184
185 /*
186 * Locks and Lock groups
187 */
188 static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
189 static lck_grp_attr_t *pmc_lock_grp_attr;
190 static lck_attr_t *pmc_lock_attr;
191
192 /* PMC tracking queue locks */
193 static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */
194 static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */
195
196 /* Reservation tracking queues lock */
197 static lck_spin_t reservations_spin;
198
199 /*
200 * Tracking queues
201 *
202 * Keeps track of registered perf monitors and perf counters
203 */
204 static queue_t perf_monitors_queue = NULL;
205 static volatile uint32_t perf_monitors_count = 0U;
206
207 static queue_t perf_counters_queue = NULL;
208 static volatile uint32_t perf_counters_count = 0U;
209
210 /*
211 * Reservation queues
212 *
213 * Keeps track of all system, task, and thread-level reservations (both active and
214 * inactive).
215 *
216 * We track them all here (rather than in their respective task or thread only)
217 * so that we can inspect our tracking data directly (rather than peeking at
218 * every task and thread) to determine if/when a new reservation would
219 * constitute a conflict.
220 */
221 static queue_t system_reservations = NULL;
222 static volatile uint32_t system_reservation_count = 0U;
223
224 static queue_t task_reservations = NULL;
225 static volatile uint32_t task_reservation_count = 0U;
226
227 static queue_t thread_reservations = NULL;
228 static volatile uint32_t thread_reservation_count = 0U;
229
230
231 #if XNU_KERNEL_PRIVATE
232
233 /*
234 * init_pmc_locks creates and initializes all the locks and lock groups and lock
235 * attributes required for the pmc sub-system.
236 */
237 static void init_pmc_locks(void) {
238 pmc_lock_attr = lck_attr_alloc_init();
239 assert(pmc_lock_attr);
240
241 pmc_lock_grp_attr = lck_grp_attr_alloc_init();
242 assert(pmc_lock_grp_attr);
243
244 pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
245 assert(pmc_lock_grp);
246
247 lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
248 lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
249
250 lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
251 }
252
253 /*
254 * init_pmc_zones initializes the allocation zones used by the pmc subsystem
255 */
256 static void init_pmc_zones(void) {
257 perf_small_zone = zinit(PERF_SMALL_UNIT_SZ,
258 MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS,
259 "pmc.small zone");
260
261 assert(perf_small_zone);
262
263 perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
264 MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS,
265 "pmc.big zone");
266
267 assert(perf_big_zone);
268 }
269
270 /*
271 * init_pmc_queues allocates and initializes the tracking queues for
272 * registering and reserving individual pmcs and perf monitors.
273 */
274 static void init_pmc_queues(void) {
275 perf_monitors_queue = (queue_t)kalloc(sizeof(queue_t));
276 assert(perf_monitors_queue);
277
278 queue_init(perf_monitors_queue);
279
280 perf_counters_queue = (queue_t)kalloc(sizeof(queue_t));
281 assert(perf_counters_queue);
282
283 queue_init(perf_counters_queue);
284
285 system_reservations = (queue_t)kalloc(sizeof(queue_t));
286 assert(system_reservations);
287
288 queue_init(system_reservations);
289
290 task_reservations = (queue_t)kalloc(sizeof(queue_t));
291 assert(task_reservations);
292
293 queue_init(task_reservations);
294
295 thread_reservations = (queue_t)kalloc(sizeof(queue_t));
296 assert(thread_reservations);
297
298 queue_init(thread_reservations);
299 }
300
301 /*
302 * pmc_bootstrap brings up all the necessary infrastructure required to use the
303 * pmc sub-system.
304 */
305 __private_extern__
306 void pmc_bootstrap(void) {
307 /* build our alloc zones */
308 init_pmc_zones();
309
310 /* build the locks */
311 init_pmc_locks();
312
313 /* build our tracking queues */
314 init_pmc_queues();
315 }
316
317 #endif /* XNU_KERNEL_PRIVATE */
318
319 /*
320 * Perf Monitor Internals
321 */
322
323 static perf_monitor_t perf_monitor_alloc(void) {
324 /* perf monitors come from the perf small zone */
325 return (perf_monitor_t)zalloc(perf_small_zone);
326 }
327
328 static void perf_monitor_free(void *pm) {
329 zfree(perf_small_zone, pm);
330 }
331
332 static void perf_monitor_init(perf_monitor_t pm) {
333 assert(pm);
334
335 pm->object = NULL;
336
337 bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
338
339 pm->useCount = 1; /* initial retain count of 1, for caller */
340
341 pm->link.next = pm->link.prev = (queue_entry_t)NULL;
342 }
343
344 /*
345 * perf_monitor_dequeue removes the given perf_monitor_t from the
346 * perf_monitor_queue, thereby unregistering it with the system.
347 */
348 static void perf_monitor_dequeue(perf_monitor_t pm) {
349 lck_spin_lock(&perf_monitor_queue_spin);
350
351 /*
352 * remove the @pm object from the @perf_monitor_queue queue (it is of type
353 * <perf_monitor_t> and has a field called @link that is the queue_link_t
354 */
355 queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);
356
357 perf_monitors_count--;
358
359 lck_spin_unlock(&perf_monitor_queue_spin);
360 }
361
362 /*
363 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
364 * thereby registering it for use with the system.
365 */
366 static void perf_monitor_enqueue(perf_monitor_t pm) {
367 lck_spin_lock(&perf_monitor_queue_spin);
368
369 queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
370
371 perf_monitors_count++;
372
373 lck_spin_unlock(&perf_monitor_queue_spin);
374 }
375
376 /*
377 * perf_monitor_reference increments the reference count for the given
378 * perf_monitor_t.
379 */
380 static void perf_monitor_reference(perf_monitor_t pm) {
381 assert(pm);
382
383 OSIncrementAtomic(&(pm->useCount));
384 }
385
386 /*
387 * perf_monitor_deallocate decrements the reference count for the given
388 * perf_monitor_t. If the reference count hits 0, the object is released back
389 * to the perf_small_zone via a call to perf_monitor_free().
390 */
391 static void perf_monitor_deallocate(perf_monitor_t pm) {
392 assert(pm);
393
394 /* If we just removed the last reference count */
395 if(1 == OSDecrementAtomic(&(pm->useCount))) {
396 /* Free the object */
397 perf_monitor_free(pm);
398 }
399 }
400
401 /*
402 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
403 * given C++ object pointer that was used when registering with the subsystem.
404 *
405 * If found, the method returns the perf_monitor_t with an extra reference
406 * placed on the object (or NULL if not
407 * found).
408 *
409 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
410 * calling perf_monitor_find.
411 */
412 static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
413 assert(monitor);
414 perf_monitor_t element = NULL;
415 perf_monitor_t found = NULL;
416
417 lck_spin_lock(&perf_monitor_queue_spin);
418
419 queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
420 if(element && element->object == monitor) {
421 /* We found it - reference the object. */
422 perf_monitor_reference(element);
423 found = element;
424 break;
425 }
426 }
427
428 lck_spin_unlock(&perf_monitor_queue_spin);
429
430 return found;
431 }
432
433 /*
434 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
435 * aassociated with.
436 */
437 static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
438 assert(pm);
439 assert(pmc);
440
441 /* Today, we merely add a reference count now that a new pmc is attached */
442 perf_monitor_reference(pm);
443 }
444
445 /*
446 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
447 * monitor it is associated with.
448 */
449 static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
450 assert(pm);
451 assert(pmc);
452
453 /* Today, we merely remove a reference count now that the pmc is detached */
454 perf_monitor_deallocate(pm);
455 }
456
457 /*
458 * Perf Counter internals
459 */
460
461 static pmc_t pmc_alloc(void) {
462 return (pmc_t)zalloc(perf_big_zone);
463 }
464
465 static void pmc_free(void *pmc) {
466 zfree(perf_big_zone, pmc);
467 }
468
469 /*
470 * pmc_init initializes a newly allocated pmc_t
471 */
472 static void pmc_init(pmc_t pmc) {
473 assert(pmc);
474
475 pmc->object = NULL;
476 pmc->monitor = NULL;
477
478 bzero(&pmc->methods, sizeof(pmc_methods_t));
479
480 /* One reference for the caller */
481 pmc->useCount = 1;
482 }
483
484 /*
485 * pmc_reference increments the reference count of the given pmc_t
486 */
487 static void pmc_reference(pmc_t pmc) {
488 assert(pmc);
489
490 OSIncrementAtomic(&(pmc->useCount));
491 }
492
493 /*
494 * pmc_deallocate decrements the reference count of the given pmc_t. If the
495 * reference count hits zero, the given pmc_t is deallocated and released back
496 * to the allocation zone.
497 */
498 static void pmc_deallocate(pmc_t pmc) {
499 assert(pmc);
500
501 /* If we just removed the last reference count */
502 if(1 == OSDecrementAtomic(&(pmc->useCount))) {
503 /* Free the pmc */
504 pmc_free(pmc);
505 }
506 }
507
508 /*
509 * pmc_dequeue removes the given, newly *un*registered pmc from the
510 * perf_counters_queue.
511 */
512 static void pmc_dequeue(pmc_t pmc) {
513 lck_spin_lock(&perf_counters_queue_spin);
514
515 queue_remove(perf_counters_queue, pmc, pmc_t, link);
516
517 perf_counters_count--;
518
519 lck_spin_unlock(&perf_counters_queue_spin);
520 }
521
522 /*
523 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
524 */
525 static void pmc_enqueue(pmc_t pmc) {
526 lck_spin_lock(&perf_counters_queue_spin);
527
528 queue_enter(perf_counters_queue, pmc, pmc_t, link);
529
530 perf_counters_count++;
531
532 lck_spin_unlock(&perf_counters_queue_spin);
533 }
534
535 /*
536 * pmc_find attempts to locate a pmc_t that was registered with the given
537 * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference
538 * which must be dropped by the caller by calling pmc_deallocate().
539 */
540 static pmc_t pmc_find(pmc_object_t object) {
541 assert(object);
542
543 lck_spin_lock(&perf_counters_queue_spin);
544
545 pmc_t element = NULL;
546 pmc_t found = NULL;
547
548 queue_iterate(perf_counters_queue, element, pmc_t, link) {
549 if(element && element->object == object) {
550 pmc_reference(element);
551
552 found = element;
553 break;
554 }
555 }
556
557 lck_spin_unlock(&perf_counters_queue_spin);
558
559 return found;
560 }
561
562 /*
563 * Config internals
564 */
565
566 /* Allocate a pmc_config_t */
567 static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
568 return (pmc_config_t)zalloc(perf_small_zone);
569 }
570
571 /* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
572 static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
573 assert(pmc);
574 assert(config);
575
576 if(config->object) {
577 pmc->methods.free_config(pmc->object, config->object);
578 config->object = NULL;
579 }
580
581 zfree(perf_small_zone, config);
582 }
583
584 static kern_return_t pmc_open(pmc_t pmc) {
585 assert(pmc);
586 assert(pmc->object);
587 assert(pmc->open_object);
588
589 return pmc->methods.open(pmc->object, pmc->open_object);
590 }
591
592 static kern_return_t pmc_close(pmc_t pmc) {
593 assert(pmc);
594 assert(pmc->object);
595 assert(pmc->open_object);
596
597 return pmc->methods.close(pmc->object, pmc->open_object);
598 }
599
600 /*
601 * Reservation Internals
602 */
603
604 static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
605 static void pmc_internal_reservation_store(pmc_reservation_t reservation);
606 static void pmc_internal_reservation_load(pmc_reservation_t reservation);
607
608 static pmc_reservation_t reservation_alloc(void) {
609 /* pmc reservations come from the perf small zone */
610 return (pmc_reservation_t)zalloc(perf_small_zone);
611 }
612
613 /*
614 * reservation_free deallocates and releases all resources associated with the
615 * given pmc_reservation_t. This includes freeing the config used to create the
616 * reservation, decrementing the reference count for the pmc used to create the
617 * reservation, and deallocating the reservation's memory.
618 */
619 static void reservation_free(pmc_reservation_t resv) {
620 /* Free config */
621 if(resv->config) {
622 assert(resv->pmc);
623
624 pmc_free_config(resv->pmc, resv->config);
625
626 resv->config = NULL;
627 }
628
629 /* release PMC */
630 (void)pmc_internal_reservation_set_pmc(resv, NULL);
631
632 /* Free reservation */
633 zfree(perf_small_zone, resv);
634 }
635
636 /*
637 * reservation_init initializes a newly created reservation.
638 */
639 static void reservation_init(pmc_reservation_t resv) {
640 assert(resv);
641
642 resv->pmc = NULL;
643 resv->config = NULL;
644 resv->value = 0ULL;
645
646 resv->flags = 0U;
647 resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
648 resv->active_last_context_in = 0U;
649
650 /*
651 * Since this member is a union, we only need to set either the task
652 * or thread to NULL.
653 */
654 resv->task = TASK_NULL;
655 }
656
657 /*
658 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
659 * there was one set already, it is deallocated (reference is dropped) before
660 * the new one is set. This methods increases the reference count of the given
661 * pmc_t.
662 *
663 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
664 * dropping the reference on any previously set pmc, and setting the reservation
665 * to having no pmc set.
666 */
667 static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
668 assert(resv);
669
670 if(resv->pmc) {
671 (void)pmc_close(resv->pmc);
672 pmc_deallocate(resv->pmc);
673 resv->pmc = NULL;
674 }
675
676 resv->pmc = pmc;
677
678 if(resv->pmc) {
679 pmc_reference(resv->pmc);
680 if(KERN_SUCCESS != pmc_open(resv->pmc)) {
681 pmc_deallocate(resv->pmc);
682 resv->pmc = NULL;
683
684 return KERN_FAILURE;
685 }
686 }
687
688 return KERN_SUCCESS;
689 }
690
691 /*
692 * Used to place reservation into one of the system, task, and thread queues
693 * Assumes the queue's spin lock is already held.
694 */
695 static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
696 assert(queue);
697 assert(resv);
698
699 queue_enter(queue, resv, pmc_reservation_t, link);
700 }
701
702 static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
703 assert(queue);
704 assert(resv);
705
706 queue_remove(queue, resv, pmc_reservation_t, link);
707 }
708
709 /* Returns TRUE if the reservation applies to the current execution context */
710 static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
711 boolean_t ret = FALSE;
712 assert(resv);
713
714 if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
715 ret = TRUE;
716 } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
717 if(current_task() == resv->task) {
718 ret = TRUE;
719 }
720 } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
721 if(current_thread() == resv->thread) {
722 ret = TRUE;
723 }
724 }
725
726 return ret;
727 }
728
729 /*
730 * pmc_accessible_core_count returns the number of logical cores that can access
731 * a given @pmc. 0 means every core in the system.
732 */
733 static uint32_t pmc_accessible_core_count(pmc_t pmc) {
734 assert(pmc);
735
736 uint32_t *cores = NULL;
737 size_t coreCt = 0UL;
738
739 if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
740 &cores, &coreCt)) {
741 coreCt = 0U;
742 }
743
744 return (uint32_t)coreCt;
745 }
746
747 /* spin lock for the queue must already be held */
748 /*
749 * This method will inspect the task/thread of the reservation to see if it
750 * matches the new incoming one (for thread/task reservations only). Will only
751 * return TRUE if the task/thread matches.
752 */
753 static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t
754 resv) {
755 assert(queue);
756 assert(resv);
757
758 boolean_t ret = FALSE;
759 pmc_reservation_t tmp = NULL;
760
761 queue_iterate(queue, tmp, pmc_reservation_t, link) {
762 if(tmp) {
763 if(tmp->pmc == resv->pmc) {
764 /* PMC matches - make sure scope matches first */
765 switch(PMC_FLAG_SCOPE(tmp->flags)) {
766 case PMC_FLAG_SCOPE_SYSTEM:
767 /*
768 * Found a reservation in system queue with same pmc - always a
769 * conflict.
770 */
771 ret = TRUE;
772 break;
773 case PMC_FLAG_SCOPE_THREAD:
774 /*
775 * Found one in thread queue with the same PMC as the
776 * argument. Only a conflict if argument scope isn't
777 * thread or system, or the threads match.
778 */
779 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) ||
780 (tmp->thread == resv->thread);
781
782 if(!ret) {
783 /*
784 * so far, no conflict - check that the pmc that is
785 * being reserved isn't accessible from more than
786 * one core, if it is, we need to say it's already
787 * taken.
788 */
789 if(1 != pmc_accessible_core_count(tmp->pmc)) {
790 ret = TRUE;
791 }
792 }
793 break;
794 case PMC_FLAG_SCOPE_TASK:
795 /*
796 * Follow similar semantics for task scope.
797 */
798
799 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
800 (tmp->task == resv->task);
801 if(!ret) {
802 /*
803 * so far, no conflict - check that the pmc that is
804 * being reserved isn't accessible from more than
805 * one core, if it is, we need to say it's already
806 * taken.
807 */
808 if(1 != pmc_accessible_core_count(tmp->pmc)) {
809 ret = TRUE;
810 }
811 }
812
813 break;
814 }
815
816 if(ret) break;
817 }
818 }
819 }
820
821 return ret;
822 }
823
824 /*
825 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be
826 * added to its target queue without createing conflicts (target queue is
827 * determined by the reservation's scope flags). Further, this method returns
828 * FALSE if any level contains a reservation for a PMC that can be accessed from
829 * more than just 1 core, and the given reservation also wants the same PMC.
830 */
831 static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
832 assert(resv);
833 boolean_t ret = TRUE;
834
835 if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
836 pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
837 pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
838 ret = FALSE;
839 }
840
841 return ret;
842 }
843
844 static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
845 assert(thread);
846
847 /* See if this thread needs it's PMC flag set */
848 pmc_reservation_t tmp = NULL;
849
850 if(!newFlag) {
851 /*
852 * If the parent task just dropped its reservation, iterate the thread
853 * reservations to see if we need to keep the pmc flag set for the given
854 * thread or not.
855 */
856 lck_spin_lock(&reservations_spin);
857
858 queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
859 if(tmp->thread == thread) {
860 newFlag = TRUE;
861 break;
862 }
863 }
864
865 lck_spin_unlock(&reservations_spin);
866 }
867
868 if(newFlag) {
869 OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
870 } else {
871 OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
872 }
873 }
874
875 /*
876 * This operation is (worst case) O(N*M) where N is number of threads in the
877 * given task, and M is the number of thread reservations in our system.
878 */
879 static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
880 assert(task);
881 thread_t thread = NULL;
882
883 if(newFlag) {
884 OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
885 } else {
886 OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
887 }
888
889 task_lock(task);
890
891 queue_iterate(&task->threads, thread, thread_t, task_threads) {
892 /* propagate the task's mask down to each thread */
893 pmc_internal_update_thread_flag(thread, newFlag);
894 }
895
896 task_unlock(task);
897 }
898
899 /*
900 * pmc_internal_reservation_add adds a reservation to the global tracking queues after
901 * ensuring there are no reservation conflicts. To do this, it takes all the
902 * spin locks for all the queue (to ensure no other core goes and adds a
903 * reservation for the same pmc to a queue that has already been checked).
904 */
905 static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
906 assert(resv);
907
908 boolean_t ret = FALSE;
909
910 /* always lock all three in the same order */
911 lck_spin_lock(&reservations_spin);
912
913 /* Check if the reservation can be added without conflicts */
914 if(pmc_internal_reservation_validate_for_pmc(resv)) {
915 ret = TRUE;
916 }
917
918 if(ret) {
919 /* add reservation to appropriate scope */
920 switch(PMC_FLAG_SCOPE(resv->flags)) {
921
922 /* System-wide counter */
923 case PMC_FLAG_SCOPE_SYSTEM:
924 /* Simply add it to the system queue */
925 pmc_internal_reservation_enqueue(system_reservations, resv);
926 system_reservation_count++;
927
928 lck_spin_unlock(&reservations_spin);
929
930 break;
931
932 /* Task-switched counter */
933 case PMC_FLAG_SCOPE_TASK:
934 assert(resv->task);
935
936 /* Not only do we enqueue it in our local queue for tracking */
937 pmc_internal_reservation_enqueue(task_reservations, resv);
938 task_reservation_count++;
939
940 lck_spin_unlock(&reservations_spin);
941
942 /* update the task mask, and propagate it to existing threads */
943 pmc_internal_update_task_flag(resv->task, TRUE);
944 break;
945
946 /* Thread-switched counter */
947 case PMC_FLAG_SCOPE_THREAD:
948 assert(resv->thread);
949
950 /*
951 * Works the same as a task-switched counter, only at
952 * thread-scope
953 */
954
955 pmc_internal_reservation_enqueue(thread_reservations, resv);
956 thread_reservation_count++;
957
958 lck_spin_unlock(&reservations_spin);
959
960 pmc_internal_update_thread_flag(resv->thread, TRUE);
961 break;
962 }
963 } else {
964 lck_spin_unlock(&reservations_spin);
965 }
966
967 return ret;
968 }
969
970 static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
971 uint32_t * cores;
972 size_t core_cnt;
973
974 /* Get the list of accessible cores */
975 if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
976 boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);
977
978 /* Fast case: the PMC is only accessible from one core and we happen to be on it */
979 if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
980 action_func(reservation);
981 } else {
982 /* Call action_func on every accessible core */
983 #if defined(__i386__) || defined(__x86_64__)
984 size_t ii;
985 cpumask_t mask = 0;
986
987 /* Build a mask for the accessible cores */
988 if (core_cnt > 0) {
989 for (ii = 0; ii < core_cnt; ii++) {
990 mask |= cpu_to_cpumask(cores[ii]);
991 }
992 } else {
993 /* core_cnt = 0 really means all cpus */
994 mask = CPUMASK_ALL;
995 }
996
997 /* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */
998 mp_cpus_call(mask, ASYNC, action_func, reservation);
999 #else
1000 #error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
1001 #endif
1002 }
1003
1004 ml_set_interrupts_enabled(intrs_enabled);
1005 }
1006
1007 }
1008
1009 /*
1010 * pmc_internal_reservation_remove removes the given reservation from the appropriate
1011 * reservation queue according to its scope.
1012 *
1013 * NOTE: The scope flag must have been set for this method to function.
1014 */
1015 static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
1016 assert(resv);
1017
1018 /*
1019 * Due to the way the macros are written, we can't just blindly queue-remove
1020 * the reservation without knowing which queue it's in. We figure this out
1021 * using the reservation's scope flags.
1022 */
1023
1024 switch(PMC_FLAG_SCOPE(resv->flags)) {
1025
1026 case PMC_FLAG_SCOPE_SYSTEM:
1027 lck_spin_lock(&reservations_spin);
1028 pmc_internal_reservation_dequeue(system_reservations, resv);
1029 system_reservation_count--;
1030 lck_spin_unlock(&reservations_spin);
1031 break;
1032
1033 case PMC_FLAG_SCOPE_TASK:
1034
1035 /* Lock the global spin lock */
1036 lck_spin_lock(&reservations_spin);
1037
1038 /* remove from the global queue */
1039 pmc_internal_reservation_dequeue(task_reservations, resv);
1040 task_reservation_count--;
1041
1042 /* unlock the global */
1043 lck_spin_unlock(&reservations_spin);
1044
1045 /* Recalculate task's counter mask */
1046 pmc_internal_update_task_flag(resv->task, FALSE);
1047 break;
1048
1049 case PMC_FLAG_SCOPE_THREAD:
1050 lck_spin_lock(&reservations_spin);
1051
1052 pmc_internal_reservation_dequeue(thread_reservations, resv);
1053 thread_reservation_count--;
1054
1055 lck_spin_unlock(&reservations_spin);
1056
1057 /* recalculate the thread's counter mask */
1058 pmc_internal_update_thread_flag(resv->thread, FALSE);
1059
1060 break;
1061 }
1062 }
1063
1064 /* Reservation State Machine
1065 *
1066 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a
1067 * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised
1068 * of a state, a count of active contexts, and a set of modifier flags. A state machine defines
1069 * the possible transitions at each event point given the current 3-tuple. Atomicity is handled
1070 * by reading the current 3-tuple, applying the transformations indicated by the state machine
1071 * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails,
1072 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
1073 * available.
1074 *
1075 * The state machine is described using tuple notation for the current state and a related notation
1076 * for describing the transformations. For concisness, the flag and state names are abbreviated as
1077 * follows:
1078 *
1079 * states:
1080 * S = STOP
1081 * CR = CAN_RUN
1082 * L = LOAD
1083 * R = RUN
1084 * ST = STORE
1085 * I = INTERRUPT
1086 * D = DEALLOC
1087 *
1088 * flags:
1089 *
1090 * S = STOPPING
1091 * D = DEALLOCING
1092 * I = INTERRUPTING
1093 *
1094 * The tuple notation is formed from the following pattern:
1095 *
1096 * tuple = < state, active-context-count, flags >
1097 * state = S | CR | L | R | ST | I | D
1098 * active-context-count = 0 | >0 | 1 | >1
1099 * flags = flags flag | blank
1100 * flag = S | D | I
1101 *
1102 * The transform notation is similar, but only describes the modifications made to the current state.
1103 * The notation is formed from the following pattern:
1104 *
1105 * transform = < state, active-context-count, flags >
1106 * state = S | CR | L | R | ST | I | D
1107 * active-context-count = + | - | blank
1108 * flags = flags flag | flags !flag | blank
1109 * flag = S | D | I
1110 *
1111 * And now for the state machine:
1112 * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished
1113 * <CR, 0, > <S, , > <D, , > <L, +, >
1114 * <D, 0, >
1115 * <D, 1, D> < , -, !D>
1116 * <D, >1, D> < , -, >
1117 * <I, 0, D> <D, , !D>
1118 * <I, 0, S> < , , !S> < , , !SD> <S, , !S>
1119 * <I, 0, > < , , S> < , , D> <CR, , >
1120 * <L, 1, D> <ST, -, >
1121 * <L, 1, ID> <ST, -, >
1122 * <L, 1, IS> < , , !SD> <ST, -, >
1123 * <L, 1, S> < , , !S> < , , !SD> <ST, -, >
1124 * <L, 1, > < , , S> < , , D> < , , IS> < , +, > <R, , >
1125 * <L, >1, D> < , -, > <R, -, >
1126 * <L, >1, ID> < , -, > <R, -, >
1127 * <L, >1, IS> < , , !SD> < , -, > <R, -, >
1128 * <L, >1, S> < , , !S> < , , !SD> < , -, > <R, -, >
1129 * <L, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > <R, , >
1130 * <R, 1, D> <ST, -, >
1131 * <R, 1, ID> <ST, -, >
1132 * <R, 1, IS> < , , !SD> <ST, -, >
1133 * <R, 1, S> < , , !S> < , , !SD> <ST, -, >
1134 * <R, 1, > < , , S> < , , D> < , , IS> < , +, > <ST, -, >
1135 * <R, >1, D> < , -, >
1136 * <R, >1, ID> < , -, >
1137 * <R, >1, IS> < , , !SD> < , -, >
1138 * <R, >1, S> < , , !S> < , , !SD> < , -, >
1139 * <R, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, >
1140 * <S, 0, > <CR, , > <D, , >
1141 * <S, 1, ID> <I, -, !I>
1142 * <S, 1, IS> < , , !SD> <I, -, !I>
1143 * <S, 1, S> < , , !S> <D, , !SD> < , -, !S>
1144 * <S, 1, > < , , S> <D, , D> <L, +, > <CR, -, >
1145 * <S, >1, ID> < , -, >
1146 * <S, >1, IS> < , , !SD> < , -, >
1147 * <S, >1, S> < , , !S> <D, , !SD> < , -, >
1148 * <S, >1, > < , , S> <D, , D> <L, +, > < , -, >
1149 * <ST, 0, D> <D, , !D>
1150 * <ST, 0, ID> <I, , !I>
1151 * <ST, 0, IS> < , , !SD> <I, , !I>
1152 * <ST, 0, S> < , , !S> < , , !SD> <S, , !S>
1153 * <ST, 0, > < , , S> < , , D> < , , IS> < , +, > <CR, , >
1154 * <ST, >0, D> < , -, > <D, , >
1155 * <ST, >0, ID> < , -, > <S, , >
1156 * <ST, >0, IS> < , , !SD> < , -, > <S, , >
1157 * <ST, >0, S> < , , !S> < , , !SD> < , -, > <S, , >
1158 * <ST, >0, > < , , S> < , , D> < , , IS> < , +, > < , -, > <L, , >
1159 */
1160
1161 static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
1162 uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
1163
1164 switch (event) {
1165 case PMC_STATE_EVENT_START:
1166 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1167 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1168 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1169 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1170 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1171 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1172 new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
1173 break;
1174 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1175 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1176 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1177 }
1178 break;
1179 }
1180 break;
1181 case PMC_STATE_EVENT_STOP:
1182 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1183 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1184 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1185 break;
1186 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1187 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1188 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1189 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1190 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1191 break;
1192 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1193 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1194 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
1195 }
1196 break;
1197 }
1198 break;
1199 case PMC_STATE_EVENT_FREE:
1200 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1201 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1202 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1203 break;
1204 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1205 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1206 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1207 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1208 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1209 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1210 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1211 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1212 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1213 break;
1214 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1215 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1216 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1217 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1218 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1219 break;
1220 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1221 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
1222 break;
1223 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1224 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1225 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
1226 } else {
1227 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1228 }
1229 break;
1230 }
1231 break;
1232 case PMC_STATE_EVENT_INTERRUPT:
1233 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1234 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1235 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1236 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1237 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
1238 break;
1239 }
1240 break;
1241 case PMC_STATE_EVENT_END_OF_INTERRUPT:
1242 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1243 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
1244 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1245 break;
1246 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
1247 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1248 break;
1249 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
1250 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1251 break;
1252 }
1253 break;
1254 case PMC_STATE_EVENT_CONTEXT_IN:
1255 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1256 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
1257 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1258 break;
1259 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1260 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1261 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1262 new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
1263 break;
1264 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1265 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1266 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
1267 }
1268 break;
1269 }
1270 break;
1271 case PMC_STATE_EVENT_CONTEXT_OUT:
1272 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1273 case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
1274 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1275 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
1276 } else {
1277 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1278 }
1279 break;
1280 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1281 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1282 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1283 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1284 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1285 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1286 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1287 }
1288 break;
1289 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
1290 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1291 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1292 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
1293 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
1294 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1295 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1296 } else {
1297 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1298 }
1299 break;
1300 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1301 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1302 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1303 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
1304 } else {
1305 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1306 }
1307 break;
1308 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
1309 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1310 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
1311 } else {
1312 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1313 }
1314 break;
1315 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
1316 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1317 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
1318 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
1319 } else {
1320 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1321 }
1322 }
1323 break;
1324 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1325 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1326 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1327 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1328 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1329 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
1330 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
1331 }
1332 break;
1333 }
1334 break;
1335 case PMC_STATE_EVENT_LOAD_FINISHED:
1336 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1337 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
1338 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1339 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1340 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
1341 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
1342 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
1343 } else {
1344 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
1345 }
1346 break;
1347 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
1348 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
1349 break;
1350 }
1351 break;
1352 case PMC_STATE_EVENT_STORE_FINISHED:
1353 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
1354 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
1355 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1356 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
1357 } else {
1358 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
1359 }
1360 break;
1361 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
1362 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
1363 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1364 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
1365 } else {
1366 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1367 }
1368 break;
1369 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
1370 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1371 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
1372 } else {
1373 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
1374 }
1375 break;
1376 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
1377 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
1378 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
1379 } else {
1380 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
1381 }
1382 break;
1383 }
1384 break;
1385 }
1386
1387 return new_state;
1388 }
1389
1390 static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
1391 pmc_state_t oldState;
1392 pmc_state_t newState;
1393
1394 assert(reservation);
1395
1396 /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition
1397 * or the there is no valid move.
1398 */
1399 do {
1400 oldState = reservation->state;
1401 newState = pmc_internal_reservation_next_state(oldState, event);
1402 } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
1403
1404 if (newState != PMC_STATE_INVALID) {
1405 COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
1406 } else {
1407 COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
1408 }
1409
1410 if (old_state_out != NULL) {
1411 *old_state_out = oldState;
1412 }
1413
1414 return newState;
1415 }
1416
1417 static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
1418 assert(reservation);
1419 pmc_state_t newState;
1420 pmc_state_t oldState;
1421
1422 /* Clear that the this reservation was active when this cpu did its last context in */
1423 OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
1424
1425 /* Move the state machine */
1426 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
1427 return;
1428 }
1429
1430 /* Do any actions required based on the state change */
1431 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
1432 /* Just moved into STORE, so store the reservation. */
1433 pmc_internal_reservation_store(reservation);
1434 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1435 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1436 thread_wakeup((event_t)reservation);
1437 }
1438
1439 }
1440
1441 static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
1442 assert(reservation);
1443 pmc_state_t oldState;
1444 pmc_state_t newState;
1445
1446 /* Move the state machine */
1447 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
1448 return;
1449 }
1450
1451 /* Mark that the reservation was active when this cpu did its last context in */
1452 OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1453
1454 /* Do any actions required based on the state change */
1455 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
1456 /* Just moved into LOAD, so load the reservation. */
1457 pmc_internal_reservation_load(reservation);
1458 }
1459
1460 }
1461
1462 static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
1463 assert(reservation);
1464 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
1465
1466 assert(reservation->pmc);
1467 assert(reservation->config);
1468
1469 pmc_state_t newState;
1470 kern_return_t ret = KERN_SUCCESS;
1471
1472 pmc_t store_pmc = reservation->pmc;
1473 pmc_object_t store_pmc_obj = store_pmc->object;
1474 perf_monitor_t store_pm = store_pmc->monitor;
1475
1476 /*
1477 * Instruct the Perf Monitor that contains this counter to turn
1478 * off the global disable for this counter.
1479 */
1480 ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
1481 if(KERN_SUCCESS != ret) {
1482 COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
1483 return;
1484 }
1485
1486 /* Instruct the counter to disable itself */
1487 ret = store_pmc->methods.disable(store_pmc_obj);
1488 if(KERN_SUCCESS != ret) {
1489 COUNTER_DEBUG(" [error] disable: 0x%x\n", ret);
1490 }
1491
1492 /*
1493 * At this point, we're off the hardware, so we don't have to
1494 * set_on_hardare(TRUE) if anything fails from here on.
1495 */
1496
1497 /* store the counter value into the reservation's stored count */
1498 ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
1499 if(KERN_SUCCESS != ret) {
1500 COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret);
1501 return;
1502 }
1503
1504 /* Advance the state machine now that the STORE is finished */
1505 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
1506 return;
1507 }
1508
1509 /* Do any actions required based on the state change */
1510 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
1511 /* Just moved into LOAD, so load the reservation. */
1512 pmc_internal_reservation_load(reservation);
1513 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1514 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1515 thread_wakeup((event_t)reservation);
1516 }
1517
1518 }
1519
1520 static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
1521 assert(reservation);
1522 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);
1523
1524 pmc_state_t newState;
1525 kern_return_t ret = KERN_SUCCESS;
1526
1527 assert(reservation->pmc);
1528 assert(reservation->config);
1529
1530 pmc_t load_pmc = reservation->pmc;
1531 pmc_object_t load_pmc_obj = load_pmc->object;
1532 perf_monitor_t load_pm = load_pmc->monitor;
1533
1534 /* Set the control register up with the stored configuration */
1535 ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
1536 if(KERN_SUCCESS != ret) {
1537 COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret);
1538 return;
1539 }
1540
1541 /* load the counter value */
1542 ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
1543 if(KERN_SUCCESS != ret) {
1544 COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret);
1545 return;
1546 }
1547
1548 /* Locally enable the counter */
1549 ret = load_pmc->methods.enable(load_pmc_obj);
1550 if(KERN_SUCCESS != ret) {
1551 COUNTER_DEBUG(" [error] enable: 0x%x\n", ret);
1552 return;
1553 }
1554
1555 /*
1556 * Instruct the Perf Monitor containing the pmc to enable the
1557 * counter.
1558 */
1559 ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
1560 if(KERN_SUCCESS != ret) {
1561 COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret);
1562 /* not on the hardware. */
1563 return;
1564 }
1565
1566 /* Advance the state machine now that the STORE is finished */
1567 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
1568 return;
1569 }
1570
1571 /* Do any actions required based on the state change */
1572 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
1573 /* Just moved into STORE, so store the reservation. */
1574 pmc_internal_reservation_store(reservation);
1575 }
1576
1577 }
1578
1579 static void pmc_internal_reservation_start_cpu(void * arg) {
1580 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1581
1582 assert(reservation);
1583
1584 if (pmc_internal_reservation_matches_context(reservation)) {
1585 /* We are in context, but the reservation may have already had the context_in method run. Attempt
1586 * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in.
1587 */
1588 uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
1589
1590 if ((oldMask & (1U << cpu_number())) == 0) {
1591 COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
1592
1593 pmc_internal_reservation_context_in(reservation);
1594 }
1595 }
1596 }
1597
1598 static void pmc_internal_reservation_stop_cpu(void * arg) {
1599 pmc_reservation_t reservation = (pmc_reservation_t)arg;
1600
1601 assert(reservation);
1602
1603 if (pmc_internal_reservation_matches_context(reservation)) {
1604 COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
1605
1606 pmc_internal_reservation_context_out(reservation);
1607 }
1608 }
1609
1610 /*!fn
1611 * pmc_reservation_interrupt is called when a PMC reservation which was setup
1612 * with an interrupt threshold counts the requested number of events. When the
1613 * underlying counter hits the threshold, an interrupt is generated, and this
1614 * method is called. This method marks the reservation as stopped, and passes
1615 * control off to the user-registered callback method, along with the
1616 * reservation (so that the user can, for example, write a 0 to the counter, and
1617 * restart the reservation).
1618 * This method assumes the reservation has a valid pmc_config_t within.
1619 *
1620 * @param target The pmc_reservation_t that caused the interrupt.
1621 * @param refCon User specified reference constant.
1622 */
1623 static void pmc_reservation_interrupt(void *target, void *refCon) {
1624 pmc_reservation_t reservation = (pmc_reservation_t)target;
1625 pmc_state_t newState;
1626 uint64_t timeout;
1627 uint32_t spins;
1628
1629 assert(reservation);
1630
1631 /* Move the state machine */
1632 if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
1633 return;
1634 }
1635
1636 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
1637 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
1638 * on every cpu that can access the PMC.
1639 */
1640 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
1641
1642 /* Spin waiting for the state to turn to INTERRUPT */
1643 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
1644 timeout += mach_absolute_time();
1645 spins = 0;
1646 while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
1647 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
1648 if (++spins > PMC_SPIN_THRESHOLD) {
1649 if (mach_absolute_time() > timeout) {
1650 pmc_spin_timeout_count++;
1651 assert(0);
1652 }
1653 }
1654
1655 cpu_pause();
1656 }
1657
1658 assert(reservation->config);
1659 assert(reservation->config->method);
1660
1661 /* Call the registered callback handler */
1662 #if DEBUG_COUNTERS
1663 uint64_t start = mach_absolute_time();
1664 #endif /* DEBUG */
1665
1666 (void)reservation->config->method(reservation, refCon);
1667
1668 #if DEBUG_COUNTERS
1669 uint64_t end = mach_absolute_time();
1670 if((end - start) > 5000ULL) {
1671 kprintf("%s - user method %p took %llu ns\n", __FUNCTION__,
1672 reservation->config->method, (end - start));
1673 }
1674 #endif
1675
1676 /* Move the state machine */
1677 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
1678 return;
1679 }
1680
1681 /* Do any post-move actions necessary */
1682 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
1683 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
1684 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
1685 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
1686 thread_wakeup((event_t)reservation);
1687 }
1688 }
1689
1690 /*
1691 * Apple-private KPI for Apple kext's (IOProfileFamily) only
1692 */
1693
1694 #if 0
1695 #pragma mark -
1696 #pragma mark IOProfileFamily private KPI
1697 #endif
1698
1699 /*
1700 * perf_monitor_register registers a new Performance Monitor, and its associated
1701 * callback methods. The given perf_monitor_object_t is the first argument to
1702 * each callback when they are called.
1703 */
1704 kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
1705 perf_monitor_methods_t *methods) {
1706
1707 COUNTER_DEBUG("registering perf monitor %p\n", monitor);
1708
1709 if(!monitor || !methods) {
1710 return KERN_INVALID_ARGUMENT;
1711 }
1712
1713 /* Protect against out-of-date driver kexts */
1714 if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
1715 return KERN_INVALID_ARGUMENT;
1716 }
1717
1718 /* All methods are required */
1719 if(!methods->supports_context_switching || !methods->enable_counters ||
1720 !methods->disable_counters) {
1721 return KERN_INVALID_ARGUMENT;
1722 }
1723
1724 /* prevent dupes. */
1725 perf_monitor_t dupe = perf_monitor_find(monitor);
1726 if(dupe) {
1727 COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
1728 perf_monitor_deallocate(dupe);
1729 return KERN_FAILURE;
1730 }
1731
1732 perf_monitor_t pm = perf_monitor_alloc();
1733 if(!pm) {
1734 return KERN_RESOURCE_SHORTAGE;
1735 }
1736
1737 /* initialize the object */
1738 perf_monitor_init(pm);
1739
1740 /* copy in the registration info */
1741 pm->object = monitor;
1742 memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
1743
1744 /* place it in the tracking queue */
1745 perf_monitor_enqueue(pm);
1746
1747 /* debug it */
1748 PRINT_PERF_MON(pm);
1749
1750 return KERN_SUCCESS;
1751 }
1752
1753 /*
1754 * perf_monitor_unregister unregisters a previously registered Perf Monitor,
1755 * looking it up by reference pointer (the same that was used in
1756 * perf_monitor_register()).
1757 */
1758 kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
1759 kern_return_t ret = KERN_FAILURE;
1760
1761 COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);
1762
1763 if(!monitor) {
1764 return KERN_INVALID_ARGUMENT;
1765 }
1766
1767 perf_monitor_t pm = perf_monitor_find(monitor);
1768 if(pm) {
1769 /* Remove it from the queue. */
1770 perf_monitor_dequeue(pm);
1771
1772 /* drop extra retain from find */
1773 perf_monitor_deallocate(pm);
1774
1775 /* and release the object */
1776 perf_monitor_deallocate(pm);
1777
1778 ret = KERN_SUCCESS;
1779 } else {
1780 COUNTER_DEBUG("could not find a registered pm that matches!\n");
1781 }
1782
1783 return ret;
1784 }
1785
1786 /*
1787 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
1788 * associated with a Perf Monitor. Perf Monitors are looked up by the reference
1789 * pointer that was used to previously register them.
1790 *
1791 * PMCs are registered with a reference pointer (@pmc_object), and a set of
1792 * callback methods. When the given callback methods are called from xnu, the
1793 * first argument will always be the reference pointer used to register the PMC.
1794 *
1795 * NOTE: @monitor must have been successfully registered via
1796 * perf_monitor_register before this method will succeed.
1797 */
1798 kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
1799 pmc_methods_t *methods, void *object) {
1800
1801 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1802
1803 if(!monitor || !pmc_object || !methods || !object) {
1804 return KERN_INVALID_ARGUMENT;
1805 }
1806
1807 /* Prevent version mismatches */
1808 if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
1809 COUNTER_DEBUG("version mismatch\n");
1810 return KERN_INVALID_ARGUMENT;
1811 }
1812
1813 /* All methods are required. */
1814 if(!methods->create_config ||
1815 !methods->free_config ||
1816 !methods->config_set_value ||
1817 !methods->config_set_threshold ||
1818 !methods->config_set_handler ||
1819 !methods->set_config ||
1820 !methods->get_monitor ||
1821 !methods->get_name ||
1822 !methods->accessible_from_core ||
1823 !methods->accessible_cores ||
1824 !methods->get_count ||
1825 !methods->set_count ||
1826 !methods->disable ||
1827 !methods->enable ||
1828 !methods->open ||
1829 !methods->close) {
1830 return KERN_INVALID_ARGUMENT;
1831 }
1832
1833 /* make sure this perf monitor object is already registered */
1834 /*
1835 * NOTE: this adds a reference to the parent, so we'll have to drop it in
1836 * any failure code paths from here on out.
1837 */
1838 perf_monitor_t pm = perf_monitor_find(monitor);
1839 if(!pm) {
1840 COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
1841 return KERN_INVALID_ARGUMENT;
1842 }
1843
1844 /* make a new pmc */
1845 pmc_t pmc = pmc_alloc();
1846 if(!pmc) {
1847 /* drop the extra reference from perf_monitor_find() */
1848 perf_monitor_deallocate(pm);
1849 return KERN_RESOURCE_SHORTAGE;
1850 }
1851
1852 /* init it */
1853 pmc_init(pmc);
1854
1855 pmc->object = pmc_object;
1856 pmc->open_object = object;
1857
1858 /* copy the callbacks in */
1859 memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));
1860
1861 pmc->monitor = pm;
1862
1863 perf_monitor_add_pmc(pmc->monitor, pmc);
1864
1865 /* enqueue it in our tracking queue */
1866 pmc_enqueue(pmc);
1867
1868 /* drop extra reference from perf_monitor_find() */
1869 perf_monitor_deallocate(pm);
1870
1871 return KERN_SUCCESS;
1872 }
1873
1874 /*
1875 * pmc_unregister unregisters a previously registered PMC, looking it up by
1876 * reference point to *both* the Perf Monitor it was created with, and the PMC's
1877 * reference pointer itself.
1878 */
1879 kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
1880 COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
1881
1882 if(!monitor || !pmc_object) {
1883 return KERN_INVALID_ARGUMENT;
1884 }
1885
1886 pmc_t pmc = pmc_find(pmc_object);
1887 if(!pmc) {
1888 COUNTER_DEBUG("Could not find a matching pmc.\n");
1889 return KERN_FAILURE;
1890 }
1891
1892 /* remove it from the global queue */
1893 pmc_dequeue(pmc);
1894
1895 perf_monitor_remove_pmc(pmc->monitor, pmc);
1896
1897 /* remove extra reference count from pmc_find() */
1898 pmc_deallocate(pmc);
1899
1900 /* dealloc the pmc */
1901 pmc_deallocate(pmc);
1902
1903 return KERN_SUCCESS;
1904 }
1905
1906 #if 0
1907 #pragma mark -
1908 #pragma mark KPI
1909 #endif
1910
1911 /*
1912 * Begin in-kernel and in-kext KPI methods
1913 */
1914
1915 /*
1916 * pmc_create_config creates a new configuration area from a given @pmc.
1917 *
1918 * NOTE: This method is not interrupt safe.
1919 */
1920 kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
1921 pmc_config_t tmp = NULL;
1922
1923 if(!pmc || !config) {
1924 return KERN_INVALID_ARGUMENT;
1925 }
1926
1927 pmc_reference(pmc);
1928
1929 tmp = pmc_config_alloc(pmc);
1930 if(tmp) {
1931 tmp->object = pmc->methods.create_config(pmc->object);
1932
1933 if(!tmp->object) {
1934 pmc_config_free(pmc, tmp);
1935 tmp = NULL;
1936 } else {
1937 tmp->interrupt_after_value = 0ULL;
1938 tmp->method = NULL;
1939 tmp->refCon = NULL;
1940 }
1941 }
1942
1943 pmc_deallocate(pmc);
1944
1945 if(!tmp) {
1946 return KERN_RESOURCE_SHORTAGE;
1947 }
1948
1949 *config = tmp;
1950
1951 return KERN_SUCCESS;
1952 }
1953
1954 /*
1955 * pmc_free_config frees a configuration area created from a given @pmc
1956 *
1957 * NOTE: This method is not interrupt safe.
1958 */
1959 void pmc_free_config(pmc_t pmc, pmc_config_t config) {
1960 assert(pmc);
1961 assert(config);
1962
1963 pmc_reference(pmc);
1964
1965 pmc_config_free(pmc, config);
1966
1967 pmc_deallocate(pmc);
1968 }
1969
1970 /*
1971 * pmc_config_set_value sets up configuration area key-value pairs. These pairs
1972 * are to be either pre-known, or looked up via CoreProfile.framework.
1973 *
1974 * NOTE: This method is not interrupt safe.
1975 */
1976 kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
1977 uint8_t id, uint64_t value) {
1978
1979 kern_return_t ret = KERN_INVALID_ARGUMENT;
1980
1981 if(!pmc || !config) {
1982 return ret;
1983 }
1984
1985 pmc_reference(pmc);
1986
1987 ret = pmc->methods.config_set_value(config->object, id, value);
1988
1989 pmc_deallocate(pmc);
1990
1991 return ret;
1992 }
1993
1994 /*
1995 * pmc_config_set_interrupt_threshold modifies a config object, instructing
1996 * the pmc that it should generate a call to the given pmc_interrupt_method_t
1997 * after the counter counts @threshold events.
1998 *
1999 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
2000 * as the first argument when the interrupt handler is invoked, and the given
2001 * @refCon (which may be NULL) as the second.
2002 *
2003 * See pmc_interrupt_method_t.
2004 *
2005 * NOTE: This method is not interrupt safe.
2006 */
2007 kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config,
2008 uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
2009 kern_return_t ret = KERN_INVALID_ARGUMENT;
2010
2011 if(!config || !pmc) {
2012 return ret;
2013 }
2014
2015 assert(config);
2016 assert(pmc);
2017
2018 pmc_reference(pmc);
2019
2020 do {
2021 /*
2022 * We have a minor annoyance to side-step here. The driver layer expects
2023 * the config to never change once a reservation has been taken out with
2024 * it. However, in order to have the PMI method have the reservation as
2025 * the first argument (in order to allow the user-method to, for
2026 * example, write a 0 to it, and restart it), we need to create the
2027 * pmc_reservation_t before setting it up in the config object.
2028 * We overcome this by caching the method in the pmc_config_t stand-in,
2029 * and mutating the pmc_config_object_t just before returning a
2030 * reservation (in pmc_reserve() and friends, below).
2031 */
2032
2033 /* might as well stash this away too. */
2034 config->interrupt_after_value = threshold;
2035 config->method = method;
2036 config->refCon = refCon;
2037
2038 ret = KERN_SUCCESS;
2039
2040 }while(0);
2041
2042 pmc_deallocate(pmc);
2043
2044 return ret;
2045 }
2046
2047 /*
2048 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
2049 * of pmc_t's returned. Callers should free this list with a call to
2050 * pmc_free_pmc_list().
2051 *
2052 * NOTE: This method is not interrupt safe.
2053 */
2054 kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
2055 pmc_t *array = NULL;
2056 pmc_t pmc = NULL;
2057 size_t count = 0UL;
2058
2059 do {
2060 /* Copy down (to the stack) the count of perf counters */
2061 vm_size_t size = perf_counters_count;
2062
2063 /* Allocate that sized chunk */
2064 array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
2065 if(!array) {
2066 return KERN_RESOURCE_SHORTAGE;
2067 }
2068
2069 /* Take the spin lock */
2070 lck_spin_lock(&perf_counters_queue_spin);
2071
2072 /* verify the size didn't change while we were allocating */
2073 if(size != perf_counters_count) {
2074 /*
2075 * queue size has changed between alloc and now - go back and
2076 * make another pass.
2077 */
2078
2079 /* drop the lock */
2080 lck_spin_unlock(&perf_counters_queue_spin);
2081
2082 /* free the block */
2083 kfree(array, sizeof(pmc_t) * size);
2084 array = NULL;
2085 }
2086
2087 /* if we get here, and array is NULL, we try again. */
2088 }while(!array);
2089
2090 /* copy the bits out */
2091 queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
2092 if(pmc) {
2093 /* copy out the pointer */
2094 array[count++] = pmc;
2095 }
2096 }
2097
2098 lck_spin_unlock(&perf_counters_queue_spin);
2099
2100 /* return the list and the size */
2101 *pmcs = array;
2102 *pmcCount = count;
2103
2104 return KERN_SUCCESS;
2105 }
2106
2107 /*
2108 * pmc_free_pmc_list frees an array of pmc_t that has been returned from
2109 * pmc_get_pmc_list.
2110 *
2111 * NOTE: This method is not interrupt safe.
2112 */
2113 void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
2114 if(pmcs && pmcCount) {
2115 COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);
2116
2117 kfree(pmcs, pmcCount * sizeof(pmc_t));
2118 }
2119 }
2120
2121 kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
2122 kern_return_t ret = KERN_INVALID_ARGUMENT;
2123
2124 if(!name || !pmcs || !pmcCount) {
2125 return ret;
2126 }
2127
2128 pmc_t *list = NULL;
2129 size_t count = 0UL;
2130
2131 if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
2132 size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
2133 size_t len = strlen(name);
2134
2135 for(ii = 0UL; ii < count; ii++) {
2136 const char *pmcName = pmc_get_name(list[ii]);
2137
2138 if(strlen(pmcName) < len) {
2139 /*
2140 * If the pmc name is shorter than the requested match, it's no
2141 * match, as we're looking for the most specific match(es).
2142 */
2143 continue;
2144 }
2145
2146 if(0 == strncmp(name, pmcName, len)) {
2147 pmc_t temp = list[ii];
2148
2149 // move matches to the head of the array.
2150 list[ii] = list[swapPtr];
2151 list[swapPtr] = temp;
2152 swapPtr++;
2153
2154 // keep a count of the matches
2155 matchCount++;
2156 }
2157 }
2158
2159 if(matchCount) {
2160 /*
2161 * If we have matches, they are all at the head of the array, so
2162 * just allocate enough space for @matchCount pmc_t's, and copy the
2163 * head of the array to the new allocation. Then free the old
2164 * allocation.
2165 */
2166
2167 pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
2168 if(result) {
2169 // copy the matches
2170 memcpy(result, list, sizeof(pmc_t) * matchCount);
2171
2172 ret = KERN_SUCCESS;
2173 }
2174
2175 pmc_free_pmc_list(list, count);
2176
2177 if(!result) {
2178 *pmcs = NULL;
2179 *pmcCount = 0UL;
2180 return KERN_RESOURCE_SHORTAGE;
2181 }
2182
2183 *pmcs = result;
2184 *pmcCount = matchCount;
2185 } else {
2186 *pmcs = NULL;
2187 *pmcCount = 0UL;
2188 }
2189 }
2190
2191 return ret;
2192 }
2193
2194 /*
2195 * pmc_get_name returns a pointer (not copied) to the human-readable name of the
2196 * given pmc.
2197 *
2198 * NOTE: Driver authors must take care to not allocate during this method, as
2199 * this method *IS* interrupt safe.
2200 */
2201 const char *pmc_get_name(pmc_t pmc) {
2202 assert(pmc);
2203
2204 const char *name = pmc->methods.get_name(pmc->object);
2205
2206 return name;
2207 }
2208
2209 /*
2210 * pmc_get_accessible_core_list returns a pointer to an array of logical core
2211 * numbers (as well as the size of that array) that represent the local cores
2212 * (hardware threads) from which the given @pmc can be accessed directly.
2213 *
2214 * NOTE: This method is interrupt safe.
2215 */
2216 kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
2217 size_t *logicalCoreCt) {
2218
2219 kern_return_t ret = KERN_INVALID_ARGUMENT;
2220
2221 if(!pmc || !logicalCores || !logicalCoreCt) {
2222 return ret;
2223 }
2224
2225 ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);
2226
2227 return ret;
2228 }
2229
2230 /*
2231 * pmc_accessible_from_core will return TRUE if the given @pmc is directly
2232 * (e.g., hardware) readable from the given logical core.
2233 *
2234 * NOTE: This method is interrupt safe.
2235 */
2236 boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
2237 boolean_t ret = FALSE;
2238
2239 assert(pmc);
2240
2241 ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
2242
2243 return ret;
2244 }
2245
2246 static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
2247 assert(resv);
2248 assert(resv->pmc);
2249 assert(config);
2250 assert(config->object);
2251
2252 /* If there's no PMI to setup, return success */
2253 if(config->interrupt_after_value && config->method) {
2254
2255 /* set the threshold */
2256 kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
2257 config->interrupt_after_value);
2258
2259 if(KERN_SUCCESS != ret) {
2260 /*
2261 * This is the most useful error message here, as this only happens
2262 * as a result of pmc_reserve*()
2263 */
2264 COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
2265 return FALSE;
2266 }
2267
2268 if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object,
2269 (void *)resv, &pmc_reservation_interrupt, config->refCon)) {
2270
2271 COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
2272 return FALSE;
2273 }
2274 }
2275
2276 return TRUE;
2277 }
2278
2279 /*
2280 * pmc_reserve will attempt to reserve the given @pmc, with a given
2281 * configuration object, for counting system-wide. This method will fail with
2282 * KERN_FAILURE if the given pmc is already reserved at any scope.
2283 *
2284 * This method consumes the given configuration object if it returns
2285 * KERN_SUCCESS. Any other return value indicates the caller
2286 * must free the config object via pmc_free_config().
2287 *
2288 * NOTE: This method is NOT interrupt safe.
2289 */
2290 kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
2291 pmc_reservation_t *reservation) {
2292
2293 if(!pmc || !config || !reservation) {
2294 return KERN_INVALID_ARGUMENT;
2295 }
2296
2297 pmc_reservation_t resv = reservation_alloc();
2298 if(!resv) {
2299 return KERN_RESOURCE_SHORTAGE;
2300 }
2301
2302 reservation_init(resv);
2303
2304 resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
2305 resv->config = config;
2306
2307 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2308 resv->config = NULL;
2309 return KERN_FAILURE;
2310 }
2311
2312 /* enqueue reservation in proper place */
2313 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2314 /* Prevent free of config object */
2315 resv->config = NULL;
2316
2317 reservation_free(resv);
2318 return KERN_FAILURE;
2319 }
2320
2321 /* Here's where we setup the PMI method (if needed) */
2322
2323 *reservation = resv;
2324
2325 return KERN_SUCCESS;
2326 }
2327
2328 /*
2329 * pmc_reserve_task will attempt to reserve the given @pmc with a given
2330 * configuration object, for counting when the given @task is running on any
2331 * logical core that can directly access the given @pmc. This method will fail
2332 * with KERN_FAILURE if the given pmc is already reserved at either system or
2333 * thread scope.
2334 *
2335 * This method consumes the given configuration object if it returns
2336 * KERN_SUCCESS. Any other return value indicates the caller
2337 * must free the config object via pmc_free_config().
2338 *
2339 * NOTE: You can reserve the same pmc for N different tasks concurrently.
2340 * NOTE: This method is NOT interrupt safe.
2341 */
2342 kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config,
2343 task_t task, pmc_reservation_t *reservation) {
2344
2345 if(!pmc || !config || !reservation || !task) {
2346 return KERN_INVALID_ARGUMENT;
2347 }
2348
2349 if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
2350 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2351 return KERN_INVALID_ARGUMENT;
2352 }
2353
2354 pmc_reservation_t resv = reservation_alloc();
2355 if(!resv) {
2356 return KERN_RESOURCE_SHORTAGE;
2357 }
2358
2359 reservation_init(resv);
2360
2361 resv->flags |= PMC_FLAG_SCOPE_TASK;
2362 resv->task = task;
2363
2364 resv->config = config;
2365
2366 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2367 resv->config = NULL;
2368 return KERN_FAILURE;
2369 }
2370
2371 /* enqueue reservation in proper place */
2372 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2373 /* Prevent free of config object */
2374 resv->config = NULL;
2375
2376 reservation_free(resv);
2377 return KERN_FAILURE;
2378 }
2379
2380 *reservation = resv;
2381
2382 return KERN_SUCCESS;
2383 }
2384
2385 /*
2386 * pmc_reserve_thread will attempt to reserve the given @pmc with a given
2387 * configuration object, for counting when the given @thread is running on any
2388 * logical core that can directly access the given @pmc. This method will fail
2389 * with KERN_FAILURE if the given pmc is already reserved at either system or
2390 * task scope.
2391 *
2392 * This method consumes the given configuration object if it returns
2393 * KERN_SUCCESS. Any other return value indicates the caller
2394 * must free the config object via pmc_free_config().
2395 *
2396 * NOTE: You can reserve the same pmc for N different threads concurrently.
2397 * NOTE: This method is NOT interrupt safe.
2398 */
2399 kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config,
2400 thread_t thread, pmc_reservation_t *reservation) {
2401 if(!pmc || !config || !reservation || !thread) {
2402 return KERN_INVALID_ARGUMENT;
2403 }
2404
2405 if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
2406 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
2407 return KERN_INVALID_ARGUMENT;
2408 }
2409
2410 pmc_reservation_t resv = reservation_alloc();
2411 if(!resv) {
2412 return KERN_RESOURCE_SHORTAGE;
2413 }
2414
2415 reservation_init(resv);
2416
2417 resv->flags |= PMC_FLAG_SCOPE_THREAD;
2418 resv->thread = thread;
2419
2420 resv->config = config;
2421
2422 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
2423 resv->config = NULL;
2424 return KERN_FAILURE;
2425 }
2426
2427 /* enqueue reservation in proper place */
2428 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
2429 /* Prevent free of config object */
2430 resv->config = NULL;
2431
2432 reservation_free(resv);
2433 return KERN_FAILURE;
2434 }
2435
2436 *reservation = resv;
2437
2438 return KERN_SUCCESS;
2439 }
2440
2441 /*
2442 * pmc_reservation_start instructs the given reservation to start counting as
2443 * soon as possible.
2444 *
2445 * NOTE: This method is interrupt safe.
2446 */
2447 kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
2448 pmc_state_t newState;
2449
2450 if(!reservation) {
2451 return KERN_INVALID_ARGUMENT;
2452 }
2453
2454 /* Move the state machine */
2455 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
2456 return KERN_FAILURE;
2457 }
2458
2459 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2460 * broadcast right before it leaves
2461 */
2462 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {
2463 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2464 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
2465 * on every cpu that can access the PMC.
2466 */
2467 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
2468 }
2469
2470 return KERN_SUCCESS;
2471 }
2472
2473 /*
2474 * pmc_reservation_stop instructs the given reservation to stop counting as
2475 * soon as possible. When this method returns, the pmc will be marked as stopping
2476 * and subsequent calls to pmc_reservation_start will succeed. This does not mean
2477 * that the pmc hardware has _actually_ stopped running. Assuming no other changes
2478 * to the reservation state, the pmc hardware _will_ stop shortly.
2479 *
2480 */
2481 kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
2482 pmc_state_t newState;
2483
2484 if(!reservation) {
2485 return KERN_INVALID_ARGUMENT;
2486 }
2487
2488 /* Move the state machine */
2489 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
2490 return KERN_FAILURE;
2491 }
2492
2493 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
2494 * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting.
2495 */
2496 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {
2497 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2498 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2499 * on every cpu that can access the PMC.
2500 */
2501
2502 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2503 }
2504
2505 return KERN_SUCCESS;
2506 }
2507
2508 /*
2509 * pmc_reservation_read will read the event count associated with a reservation.
2510 * If the caller is current executing in a context that both a) matches the
2511 * reservation's context, and b) can access the reservation's pmc directly, the
2512 * value will be read from hardware. Otherwise, this returns the reservation's
2513 * stored value.
2514 *
2515 * NOTE: This method is interrupt safe.
2516 * NOTE: When not on the interrupt stack, this method may block.
2517 */
2518 kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
2519 kern_return_t ret = KERN_FAILURE;
2520 uint64_t timeout;
2521 uint32_t spins;
2522
2523 if(!reservation || !value) {
2524 return KERN_INVALID_ARGUMENT;
2525 }
2526
2527 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2528 timeout += mach_absolute_time();
2529 spins = 0;
2530 do {
2531 uint32_t state = reservation->state;
2532
2533 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2534 /* Attempt read from hardware via drivers. */
2535
2536 assert(reservation->pmc);
2537
2538 ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
2539
2540 break;
2541 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2542 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2543 /* Spin */
2544 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2545 if (++spins > PMC_SPIN_THRESHOLD) {
2546 if (mach_absolute_time() > timeout) {
2547 pmc_spin_timeout_count++;
2548 assert(0);
2549 }
2550 }
2551
2552 cpu_pause();
2553 } else {
2554 break;
2555 }
2556 } while (1);
2557
2558 /* If the direct hardware read failed (for whatever reason) */
2559 if(KERN_SUCCESS != ret) {
2560 /* Read stored value */
2561 *value = reservation->value;
2562 }
2563
2564 return KERN_SUCCESS;
2565 }
2566
2567 /*
2568 * pmc_reservation_write will write the event count associated with a reservation.
2569 * If the caller is current executing in a context that both a) matches the
2570 * reservation's context, and b) can access the reservation's pmc directly, the
2571 * value will be written to hardware. Otherwise, this writes the reservation's
2572 * stored value.
2573 *
2574 * NOTE: This method is interrupt safe.
2575 * NOTE: When not on the interrupt stack, this method may block.
2576 */
2577 kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
2578 kern_return_t ret = KERN_FAILURE;
2579 uint64_t timeout;
2580 uint32_t spins;
2581
2582 if(!reservation) {
2583 return KERN_INVALID_ARGUMENT;
2584 }
2585
2586 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
2587 timeout += mach_absolute_time();
2588 spins = 0;
2589 do {
2590 uint32_t state = reservation->state;
2591
2592 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
2593 /* Write to hardware via drivers. */
2594 assert(reservation->pmc);
2595
2596 ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
2597 break;
2598 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
2599 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
2600 /* Spin */
2601 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
2602 if (++spins > PMC_SPIN_THRESHOLD) {
2603 if (mach_absolute_time() > timeout) {
2604 pmc_spin_timeout_count++;
2605 assert(0);
2606 }
2607 }
2608
2609 cpu_pause();
2610 } else {
2611 break;
2612 }
2613 } while (1);
2614
2615 if(KERN_SUCCESS != ret) {
2616 /* Write stored value */
2617 reservation->value = value;
2618 }
2619
2620 return KERN_SUCCESS;
2621 }
2622
2623 /*
2624 * pmc_reservation_free releases a reservation and all associated resources.
2625 *
2626 * NOTE: This method is NOT interrupt safe.
2627 */
2628 kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
2629 pmc_state_t newState;
2630
2631 if(!reservation) {
2632 return KERN_INVALID_ARGUMENT;
2633 }
2634
2635 /* Move the state machine */
2636 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
2637 return KERN_FAILURE;
2638 }
2639
2640 /* If we didn't move directly to DEALLOC, help things along */
2641 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {
2642 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching
2643 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
2644 * on every cpu that can access the PMC.
2645 */
2646 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
2647 }
2648
2649 /* Block until the reservation hits the <DEALLOC, 0, > state */
2650 while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
2651 assert_wait((event_t)reservation, THREAD_UNINT);
2652 thread_block(THREAD_CONTINUE_NULL);
2653 }
2654
2655 /* remove from queues */
2656 pmc_internal_reservation_remove(reservation);
2657
2658 /* free reservation */
2659 reservation_free(reservation);
2660
2661 return KERN_SUCCESS;
2662 }
2663
2664 /*
2665 * pmc_context_switch performs all context switching necessary to save all pmc
2666 * state associated with @oldThread (and the task to which @oldThread belongs),
2667 * as well as to restore all pmc state associated with @newThread (and the task
2668 * to which @newThread belongs).
2669 *
2670 * NOTE: This method IS interrupt safe.
2671 */
2672 boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
2673 pmc_reservation_t resv = NULL;
2674 uint32_t cpuNum = cpu_number();
2675
2676 /* Out going thread: save pmc state */
2677 lck_spin_lock(&reservations_spin);
2678
2679 /* interate over any reservations */
2680 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2681 if(resv && oldThread == resv->thread) {
2682
2683 /* check if we can read the associated pmc from this core. */
2684 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2685 /* save the state At this point, if it fails, it fails. */
2686 (void)pmc_internal_reservation_context_out(resv);
2687 }
2688 }
2689 }
2690
2691 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2692 if(resv && resv->task == oldThread->task) {
2693 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2694 (void)pmc_internal_reservation_context_out(resv);
2695 }
2696 }
2697 }
2698
2699 /* Incoming task: restore */
2700
2701 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
2702 if(resv && resv->thread == newThread) {
2703 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2704 (void)pmc_internal_reservation_context_in(resv);
2705 }
2706 }
2707 }
2708
2709
2710 queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
2711 if(resv && resv->task == newThread->task) {
2712 if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
2713 (void)pmc_internal_reservation_context_in(resv);
2714 }
2715 }
2716 }
2717
2718 lck_spin_unlock(&reservations_spin);
2719
2720 return TRUE;
2721 }
2722
2723 #else /* !CONFIG_COUNTERS */
2724
2725 #if 0
2726 #pragma mark -
2727 #pragma mark Dummy functions
2728 #endif
2729
2730 /*
2731 * In the case that someone has chosen not to include the PMC KPI in some
2732 * configuration, we still have exports for kexts, so we'll need to define stub
2733 * methods that return failures.
2734 */
2735 kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
2736 perf_monitor_methods_t *methods __unused) {
2737 return KERN_FAILURE;
2738 }
2739
2740 kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
2741 return KERN_FAILURE;
2742 }
2743
2744 kern_return_t pmc_register(perf_monitor_object_t monitor __unused,
2745 pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
2746 return KERN_FAILURE;
2747 }
2748
2749 kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
2750 pmc_object_t pmc __unused) {
2751 return KERN_FAILURE;
2752 }
2753
2754 kern_return_t pmc_create_config(pmc_t pmc __unused,
2755 pmc_config_t *config __unused) {
2756 return KERN_FAILURE;
2757 }
2758
2759 void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
2760 }
2761
2762 kern_return_t pmc_config_set_value(pmc_t pmc __unused,
2763 pmc_config_t config __unused, uint8_t id __unused,
2764 uint64_t value __unused) {
2765 return KERN_FAILURE;
2766 }
2767
2768 kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused,
2769 pmc_config_t config __unused, uint64_t threshold __unused,
2770 pmc_interrupt_method_t method __unused, void *refCon __unused) {
2771 return KERN_FAILURE;
2772 }
2773
2774 kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
2775 return KERN_FAILURE;
2776 }
2777
2778 void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
2779 }
2780
2781 kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused,
2782 size_t *pmcCount __unused) {
2783 return KERN_FAILURE;
2784 }
2785
2786 const char *pmc_get_name(pmc_t pmc __unused) {
2787 return "";
2788 }
2789
2790 kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused,
2791 uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
2792 return KERN_FAILURE;
2793 }
2794
2795 boolean_t pmc_accessible_from_core(pmc_t pmc __unused,
2796 uint32_t logicalCore __unused) {
2797 return FALSE;
2798 }
2799
2800 kern_return_t pmc_reserve(pmc_t pmc __unused,
2801 pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
2802 return KERN_FAILURE;
2803 }
2804
2805 kern_return_t pmc_reserve_task(pmc_t pmc __unused,
2806 pmc_config_t config __unused, task_t task __unused,
2807 pmc_reservation_t *reservation __unused) {
2808 return KERN_FAILURE;
2809 }
2810
2811 kern_return_t pmc_reserve_thread(pmc_t pmc __unused,
2812 pmc_config_t config __unused, thread_t thread __unused,
2813 pmc_reservation_t *reservation __unused) {
2814 return KERN_FAILURE;
2815 }
2816
2817 kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
2818 return KERN_FAILURE;
2819 }
2820
2821 kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
2822 return KERN_FAILURE;
2823 }
2824
2825 kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused,
2826 uint64_t *value __unused) {
2827 return KERN_FAILURE;
2828 }
2829
2830 kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused,
2831 uint64_t value __unused) {
2832 return KERN_FAILURE;
2833 }
2834
2835 kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
2836 return KERN_FAILURE;
2837 }
2838
2839
2840 #endif /* !CONFIG_COUNTERS */