]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/pmc/pmc.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / pmc / pmc.c
diff --git a/osfmk/pmc/pmc.c b/osfmk/pmc/pmc.c
new file mode 100644 (file)
index 0000000..43da760
--- /dev/null
@@ -0,0 +1,2855 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <kern/kalloc.h>
+#include <kern/kern_types.h>
+#include <kern/locks.h>
+#include <kern/misc_protos.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/zalloc.h>
+#include <machine/machine_cpu.h>
+
+#include <pmc/pmc.h>
+
+#include <libkern/OSAtomic.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <i386/mp.h>
+#endif
+
+#if defined(__ppc__)
+#include <ppc/cpu_internal.h>
+#include <ppc/machine_cpu.h>
+#endif
+
+#if CONFIG_COUNTERS
+
+/* various debug logging enable */
+#undef DEBUG_COUNTERS
+
+typedef uint8_t pmc_state_event_t;
+
+#define PMC_STATE_EVENT_START                          0
+#define PMC_STATE_EVENT_STOP                           1
+#define PMC_STATE_EVENT_FREE                           2
+#define PMC_STATE_EVENT_INTERRUPT                      3
+#define PMC_STATE_EVENT_END_OF_INTERRUPT       4
+#define PMC_STATE_EVENT_CONTEXT_IN                     5
+#define PMC_STATE_EVENT_CONTEXT_OUT                    6
+#define PMC_STATE_EVENT_LOAD_FINISHED          7
+#define PMC_STATE_EVENT_STORE_FINISHED         8
+
+/* PMC spin timeouts */
+#define PMC_SPIN_THRESHOLD     10      /* Number of spins to allow before checking mach_absolute_time() */
+#define PMC_SPIN_TIMEOUT_US    10      /* Time in microseconds before the spin causes an assert */
+
+uint64_t pmc_spin_timeout_count = 0;   /* Number of times where a PMC spin loop causes a timeout */
+
+#ifdef DEBUG_COUNTERS
+#      include <pexpert/pexpert.h>
+#      define COUNTER_DEBUG(...) \
+       do { \
+               kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
+               kprintf(__VA_ARGS__); \
+       } while(0)
+
+#      define PRINT_PERF_MON(x)        \
+       do { \
+               kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
+                       x, x->object, x->useCount, \
+                       x->methods.supports_context_switching ? \
+                       x->methods.supports_context_switching(x->object) : 0); \
+       } while(0)
+
+static const char const * pmc_state_state_name(pmc_state_t state) {
+       switch (PMC_STATE_STATE(state)) {
+               case PMC_STATE_STATE_INVALID:
+                       return "INVALID";
+               case PMC_STATE_STATE_STOP:
+                       return "STOP";
+               case PMC_STATE_STATE_CAN_RUN:
+                       return "CAN_RUN";
+               case PMC_STATE_STATE_LOAD:
+                       return "LOAD";
+               case PMC_STATE_STATE_RUN:
+                       return "RUN";
+               case PMC_STATE_STATE_STORE:
+                       return "STORE";
+               case PMC_STATE_STATE_INTERRUPT:
+                       return "INTERRUPT";
+               case PMC_STATE_STATE_DEALLOC:
+                       return "DEALLOC";
+               default:
+                       return "UNKNOWN";
+       }
+}
+
+static const char const * pmc_state_event_name(pmc_state_event_t event) {
+       switch (event) {
+               case PMC_STATE_EVENT_START:
+                       return "START";
+               case PMC_STATE_EVENT_STOP:
+                       return "STOP";
+               case PMC_STATE_EVENT_FREE:
+                       return "FREE";
+               case PMC_STATE_EVENT_INTERRUPT:
+                       return "INTERRUPT";
+               case PMC_STATE_EVENT_END_OF_INTERRUPT:
+                       return "END OF INTERRUPT";
+               case PMC_STATE_EVENT_CONTEXT_IN:
+                       return "CONTEXT IN";
+               case PMC_STATE_EVENT_CONTEXT_OUT:
+                       return "CONTEXT OUT";
+               case PMC_STATE_EVENT_LOAD_FINISHED:
+                       return "LOAD_FINISHED";
+               case PMC_STATE_EVENT_STORE_FINISHED:
+                       return "STORE_FINISHED";
+               default:
+                       return "UNKNOWN";
+       }
+}
+
+#      define PMC_STATE_FORMAT "<%s, %u, %s%s%s>"
+#      define PMC_STATE_ARGS(x)        pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
+                                       ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
+#else
+#      define COUNTER_DEBUG(...)
+#      define PRINT_PERF_MON(x)
+#      define PMC_STATE_FORMAT
+#      define PMC_STATE_ARGS(x)
+#endif
+
+/*!struct
+ * pmc_config is the data behind a pmc_config_t.
+ * @member object A pointer to an instance of IOPerformanceCounterConfiguration
+ * @member method A pointer to a method to call to handle PMI.
+ * @member interrupt_after_value Cause a PMI after the counter counts this many
+ * events.
+ * @member refCon Passed to the @method method as the refCon argument.
+ */
+struct pmc_config {
+       pmc_config_object_t object;
+       volatile pmc_interrupt_method_t method;
+       uint64_t interrupt_after_value;
+       void *refCon;
+};
+
+/*
+ * Allocation Zones
+ * 
+ * Two allocation zones - Perf zone small and Perf zone big.
+ * Each zone has associated maximums, defined below.
+ * The small zone is the max of the smallest allocation objects (all sizes on
+ * K64):
+ *     perf_monitor_t - 48 bytes
+ *             perf_monitor_methods_t - 28 bytes
+ *     pmc_reservation_t - 48 bytes
+ *  pmc_config_t - 32 bytes
+ * perf_small_zone unit size is (on K64) 48 bytes
+ * perf_small_zone max count must be max number of perf monitors, plus (max
+ * number of reservations * 2). The "*2" is because each reservation has a
+ * pmc_config_t within.
+ *
+ * Big zone is max of the larger allocation units
+ *     pmc_t - 144 bytes
+ *             pmc_methods_t - 116 bytes
+ * perf_big_zone unit size is (on K64) 144 bytes
+ * perf_big_zone max count is the max number of PMCs we support.
+ */
+
+static zone_t perf_small_zone = NULL;
+#define MAX_PERF_SMALLS                (256 + 8196 + 8196)
+#define PERF_SMALL_UNIT_SZ     (MAX(MAX(sizeof(struct perf_monitor), \
+       sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) 
+
+static zone_t perf_big_zone = NULL;
+#define MAX_PERF_BIGS          (1024)
+#define PERF_BIG_UNIT_SZ       (sizeof(struct pmc))
+
+/*
+ * Locks and Lock groups
+ */
+static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
+static lck_grp_attr_t *pmc_lock_grp_attr;
+static lck_attr_t *pmc_lock_attr;
+
+/* PMC tracking queue locks */
+static lck_spin_t perf_monitor_queue_spin;             /* protects adding and removing from queue */
+static lck_spin_t perf_counters_queue_spin;            /* protects adding and removing from queue */
+
+/* Reservation tracking queues lock */
+static lck_spin_t reservations_spin;
+
+/*
+ * Tracking queues
+ *
+ * Keeps track of registered perf monitors and perf counters
+ */
+static queue_t perf_monitors_queue = NULL;
+static volatile uint32_t perf_monitors_count = 0U;
+
+static queue_t perf_counters_queue = NULL;
+static volatile uint32_t perf_counters_count = 0U;
+
+/* 
+ * Reservation queues
+ *
+ * Keeps track of all system, task, and thread-level reservations (both active and
+ * inactive).
+ *
+ * We track them all here (rather than in their respective task or thread only)
+ * so that we can inspect our tracking data directly (rather than peeking at
+ * every task and thread) to determine if/when a new reservation would
+ * constitute a conflict.
+ */
+static queue_t system_reservations = NULL;
+static volatile uint32_t system_reservation_count __attribute__((aligned(4))) = 0U;
+
+static queue_t task_reservations = NULL;
+static volatile uint32_t task_reservation_count __attribute__((aligned(4))) = 0U;
+
+static queue_t thread_reservations = NULL;
+static volatile uint32_t thread_reservation_count __attribute__((aligned(4))) = 0U;
+
+
+#if XNU_KERNEL_PRIVATE
+
+/*
+ * init_pmc_locks creates and initializes all the locks and lock groups and lock
+ * attributes required for the pmc sub-system.
+ */
+static void init_pmc_locks(void) {
+       pmc_lock_attr = lck_attr_alloc_init();
+       assert(pmc_lock_attr);
+
+       pmc_lock_grp_attr = lck_grp_attr_alloc_init();
+       assert(pmc_lock_grp_attr);
+
+       pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
+       assert(pmc_lock_grp);
+
+       lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
+       lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
+
+       lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
+}
+
+/*
+ * init_pmc_zones initializes the allocation zones used by the pmc subsystem
+ */
+static void init_pmc_zones(void) {
+       perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, 
+               MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, 
+               "pmc.small zone");
+
+       assert(perf_small_zone);
+
+       perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
+               MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, 
+               "pmc.big zone");
+
+       assert(perf_big_zone);
+}
+
+/*
+ * init_pmc_queues allocates and initializes the tracking queues for
+ * registering and reserving individual pmcs and perf monitors.
+ */
+static void init_pmc_queues(void) {
+       perf_monitors_queue = (queue_t)kalloc(sizeof(queue_t));
+       assert(perf_monitors_queue);
+
+       queue_init(perf_monitors_queue);
+
+       perf_counters_queue = (queue_t)kalloc(sizeof(queue_t));
+       assert(perf_counters_queue);
+
+       queue_init(perf_counters_queue);
+
+       system_reservations = (queue_t)kalloc(sizeof(queue_t));
+       assert(system_reservations);
+
+       queue_init(system_reservations);
+
+       task_reservations = (queue_t)kalloc(sizeof(queue_t));
+       assert(task_reservations);
+
+       queue_init(task_reservations);
+
+       thread_reservations = (queue_t)kalloc(sizeof(queue_t));
+       assert(thread_reservations);
+
+       queue_init(thread_reservations);
+}
+
+/*
+ * pmc_bootstrap brings up all the necessary infrastructure required to use the
+ * pmc sub-system.
+ */
+__private_extern__
+void pmc_bootstrap(void) {
+       /* build our alloc zones */
+       init_pmc_zones();
+
+       /* build the locks */
+       init_pmc_locks();
+
+       /* build our tracking queues */
+       init_pmc_queues();
+}
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+/*
+ * Perf Monitor Internals
+ */
+
+static perf_monitor_t perf_monitor_alloc(void) {
+       /* perf monitors come from the perf small zone */
+       return (perf_monitor_t)zalloc(perf_small_zone);
+}
+
+static void perf_monitor_free(void *pm) {
+       zfree(perf_small_zone, pm);
+}
+
+static void perf_monitor_init(perf_monitor_t pm) {
+       assert(pm);
+
+       pm->object = NULL;
+
+       bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
+
+       pm->useCount = 1;       /* initial retain count of 1, for caller */
+
+       pm->link.next = pm->link.prev = (queue_entry_t)NULL;
+}
+
+/*
+ * perf_monitor_dequeue removes the given perf_monitor_t from the
+ * perf_monitor_queue, thereby unregistering it with the system.
+ */
+static void perf_monitor_dequeue(perf_monitor_t pm) {
+       lck_spin_lock(&perf_monitor_queue_spin);
+       
+       /* 
+        * remove the @pm object from the @perf_monitor_queue queue (it is of type
+        * <perf_monitor_t> and has a field called @link that is the queue_link_t
+        */
+       queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);
+
+       perf_monitors_count--;
+
+       lck_spin_unlock(&perf_monitor_queue_spin);
+}
+
+/*
+ * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
+ * thereby registering it for use with the system.
+ */
+static void perf_monitor_enqueue(perf_monitor_t pm) {
+       lck_spin_lock(&perf_monitor_queue_spin);
+
+       queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
+
+       perf_monitors_count++;
+
+       lck_spin_unlock(&perf_monitor_queue_spin);
+}
+
+/*
+ * perf_monitor_reference increments the reference count for the given
+ * perf_monitor_t.
+ */
+static void perf_monitor_reference(perf_monitor_t pm) {
+       assert(pm);
+
+       OSIncrementAtomic(&(pm->useCount));
+}
+
+/*
+ * perf_monitor_deallocate decrements the reference count for the given
+ * perf_monitor_t.  If the reference count hits 0, the object is released back
+ * to the perf_small_zone via a call to perf_monitor_free().
+ */
+static void perf_monitor_deallocate(perf_monitor_t pm) {
+       assert(pm);
+
+       /* If we just removed the last reference count */
+       if(1 == OSDecrementAtomic(&(pm->useCount))) {
+               /* Free the object */
+               perf_monitor_free(pm);
+       }
+}
+
+/*
+ * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
+ * given C++ object pointer that was used when registering with the subsystem.
+ *
+ * If found, the method returns the perf_monitor_t with an extra reference 
+ * placed on the object (or NULL if not
+ * found).
+ *
+ * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
+ * calling perf_monitor_find.
+ */
+static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
+       assert(monitor);
+       perf_monitor_t element = NULL;
+       perf_monitor_t found = NULL;
+
+       lck_spin_lock(&perf_monitor_queue_spin);
+       
+       queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
+               if(element && element->object == monitor) {
+                       /* We found it - reference the object. */
+                       perf_monitor_reference(element);
+                       found = element;
+                       break;
+               }
+       }
+
+       lck_spin_unlock(&perf_monitor_queue_spin);
+
+       return found;
+}
+
+/*
+ * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
+ * aassociated with.
+ */
+static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
+       assert(pm);
+       assert(pmc);
+
+       /* Today, we merely add a reference count now that a new pmc is attached */
+       perf_monitor_reference(pm);
+}
+
+/*
+ * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
+ * monitor it is associated with.
+ */
+static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
+       assert(pm);
+       assert(pmc);
+
+       /* Today, we merely remove a reference count now that the pmc is detached */
+       perf_monitor_deallocate(pm);
+}
+
+/*
+ * Perf Counter internals
+ */
+
+static pmc_t pmc_alloc(void) {
+       return (pmc_t)zalloc(perf_big_zone);
+}
+
+static void pmc_free(void *pmc) {
+       zfree(perf_big_zone, pmc);
+}
+
+/*
+ * pmc_init initializes a newly allocated pmc_t
+ */
+static void pmc_init(pmc_t pmc) {
+       assert(pmc);
+
+       pmc->object = NULL;
+       pmc->monitor = NULL;
+
+       bzero(&pmc->methods, sizeof(pmc_methods_t));
+
+       /* One reference for the caller */
+       pmc->useCount = 1;
+}
+
+/*
+ * pmc_reference increments the reference count of the given pmc_t
+ */
+static void pmc_reference(pmc_t pmc) {
+       assert(pmc);
+
+       OSIncrementAtomic(&(pmc->useCount));
+}
+
+/*
+ * pmc_deallocate decrements the reference count of the given pmc_t. If the
+ * reference count hits zero, the given pmc_t is deallocated and released back
+ * to the allocation zone.
+ */
+static void pmc_deallocate(pmc_t pmc) {
+       assert(pmc);
+
+       /* If we just removed the last reference count */
+       if(1 == OSDecrementAtomic(&(pmc->useCount))) {
+               /* Free the pmc */
+               pmc_free(pmc);
+       }
+}
+
+/*
+ * pmc_dequeue removes the given, newly *un*registered pmc from the
+ * perf_counters_queue.
+ */
+static void pmc_dequeue(pmc_t pmc) {
+       lck_spin_lock(&perf_counters_queue_spin);
+
+       queue_remove(perf_counters_queue, pmc, pmc_t, link);
+
+       perf_counters_count--;
+
+       lck_spin_unlock(&perf_counters_queue_spin);
+}
+
+/*
+ * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
+ */
+static void pmc_enqueue(pmc_t pmc) {
+       lck_spin_lock(&perf_counters_queue_spin);
+
+       queue_enter(perf_counters_queue, pmc, pmc_t, link);
+
+       perf_counters_count++;
+
+       lck_spin_unlock(&perf_counters_queue_spin);
+}
+
+/*
+ * pmc_find attempts to locate a pmc_t that was registered with the given
+ * pmc_object_t pointer.  If found, it returns the pmc_t with an extra reference
+ * which must be dropped by the caller by calling pmc_deallocate().
+ */
+static pmc_t pmc_find(pmc_object_t object) {
+       assert(object);
+
+       lck_spin_lock(&perf_counters_queue_spin);
+       
+       pmc_t element = NULL;
+       pmc_t found = NULL;
+
+       queue_iterate(perf_counters_queue, element, pmc_t, link) {
+               if(element && element->object == object) {
+                       pmc_reference(element);
+
+                       found = element;
+                       break;
+               }
+       }
+
+       lck_spin_unlock(&perf_counters_queue_spin);
+
+       return found;
+}
+
+/*
+ * Config internals
+ */
+
+/* Allocate a pmc_config_t */
+static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
+       return (pmc_config_t)zalloc(perf_small_zone);
+}
+
+/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
+static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
+       assert(pmc);
+       assert(config);
+
+       if(config->object) {
+               pmc->methods.free_config(pmc->object, config->object);
+               config->object = NULL;
+       }
+
+       zfree(perf_small_zone, config);
+}
+
+static kern_return_t pmc_open(pmc_t pmc) {
+       assert(pmc);
+       assert(pmc->object);
+       assert(pmc->open_object);
+
+       return pmc->methods.open(pmc->object, pmc->open_object);
+}
+
+static kern_return_t pmc_close(pmc_t pmc) {
+       assert(pmc);
+       assert(pmc->object);
+       assert(pmc->open_object);
+
+       return pmc->methods.close(pmc->object, pmc->open_object);
+}
+
+/*
+ * Reservation Internals
+ */
+
+static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
+static void pmc_internal_reservation_store(pmc_reservation_t reservation);
+static void pmc_internal_reservation_load(pmc_reservation_t reservation);
+
+static pmc_reservation_t reservation_alloc(void) {
+       /* pmc reservations come from the perf small zone */
+       return (pmc_reservation_t)zalloc(perf_small_zone);
+}
+
+/*
+ * reservation_free deallocates and releases all resources associated with the
+ * given pmc_reservation_t.  This includes freeing the config used to create the
+ * reservation, decrementing the reference count for the pmc used to create the
+ * reservation, and deallocating the reservation's memory.
+ */
+static void reservation_free(pmc_reservation_t resv) {
+       /* Free config */
+       if(resv->config) {
+               assert(resv->pmc);
+
+               pmc_free_config(resv->pmc, resv->config);
+
+               resv->config = NULL;
+       }
+
+       /* release PMC */
+       (void)pmc_internal_reservation_set_pmc(resv, NULL);
+
+       /* Free reservation */
+       zfree(perf_small_zone, resv);
+}
+
+/*
+ * reservation_init initializes a newly created reservation.
+ */
+static void reservation_init(pmc_reservation_t resv) {
+       assert(resv);
+
+       resv->pmc = NULL;
+       resv->config = NULL;
+       resv->value = 0ULL;
+
+       resv->flags = 0U;
+       resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
+       resv->active_last_context_in = 0U;
+
+       /*
+        * Since this member is a union, we only need to set either the task 
+        * or thread to NULL.
+        */
+       resv->task = TASK_NULL;
+}
+
+/*
+ * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
+ * there was one set already, it is deallocated (reference is dropped) before
+ * the new one is set.  This methods increases the reference count of the given
+ * pmc_t.
+ *
+ * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
+ * dropping the reference on any previously set pmc, and setting the reservation
+ * to having no pmc set.
+ */
+static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
+       assert(resv);
+
+       if(resv->pmc) {
+               (void)pmc_close(resv->pmc);
+               pmc_deallocate(resv->pmc);
+               resv->pmc = NULL;
+       }
+
+       resv->pmc = pmc;
+
+       if(resv->pmc) {
+               pmc_reference(resv->pmc);
+               if(KERN_SUCCESS != pmc_open(resv->pmc)) {
+                       pmc_deallocate(resv->pmc);
+                       resv->pmc = NULL;
+
+                       return KERN_FAILURE;
+               }
+       }
+
+       return KERN_SUCCESS;
+}
+
+/* 
+ * Used to place reservation into one of the system, task, and thread queues
+ * Assumes the queue's spin lock is already held.
+ */
+static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
+       assert(queue);
+       assert(resv);
+
+       queue_enter(queue, resv, pmc_reservation_t, link);
+}
+
+static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
+       assert(queue);
+       assert(resv);
+
+       queue_remove(queue, resv, pmc_reservation_t, link);
+}
+
+/* Returns TRUE if the reservation applies to the current execution context */
+static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
+       boolean_t ret = FALSE;
+       assert(resv);
+
+       if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
+               ret = TRUE;
+       } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
+               if(current_task() == resv->task) {
+                       ret = TRUE;
+               }
+       } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
+               if(current_thread() == resv->thread) {
+                       ret = TRUE;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * pmc_accessible_core_count returns the number of logical cores that can access
+ * a given @pmc.  0 means every core in the system.
+ */
+static uint32_t pmc_accessible_core_count(pmc_t pmc) {
+       assert(pmc);
+
+       uint32_t *cores = NULL;
+       size_t coreCt = 0UL;
+
+       if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
+               &cores, &coreCt)) {
+               coreCt = 0U;
+       }
+
+       return (uint32_t)coreCt;
+}
+
+/* spin lock for the queue must already be held */
+/*
+ * This method will inspect the task/thread of the reservation to see if it
+ * matches the new incoming one (for thread/task reservations only).  Will only
+ * return TRUE if the task/thread matches.
+ */
+static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t
+resv) {
+       assert(queue);
+       assert(resv);
+
+       boolean_t ret = FALSE;
+       pmc_reservation_t tmp = NULL;
+
+       queue_iterate(queue, tmp, pmc_reservation_t, link) {
+               if(tmp) {
+                       if(tmp->pmc == resv->pmc) {
+                               /* PMC matches - make sure scope matches first */
+                               switch(PMC_FLAG_SCOPE(tmp->flags)) {
+                                       case PMC_FLAG_SCOPE_SYSTEM:
+                                               /*
+                                                * Found a reservation in system queue with same pmc - always a
+                                                * conflict.
+                                                */
+                                               ret = TRUE;
+                                               break;
+                                       case PMC_FLAG_SCOPE_THREAD:
+                                               /*
+                                                * Found one in thread queue with the same PMC as the
+                                                * argument. Only a conflict if argument scope isn't
+                                                * thread or system, or the threads match.
+                                                */
+                                               ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 
+                                                       (tmp->thread == resv->thread);
+
+                                               if(!ret) {
+                                                       /*
+                                                        * so far, no conflict - check that the pmc that is
+                                                        * being reserved isn't accessible from more than
+                                                        * one core, if it is, we need to say it's already
+                                                        * taken.
+                                                        */
+                                                       if(1 != pmc_accessible_core_count(tmp->pmc)) {
+                                                               ret = TRUE;
+                                                       }
+                                               }
+                                               break;
+                                       case PMC_FLAG_SCOPE_TASK:
+                                               /* 
+                                                * Follow similar semantics for task scope.
+                                                */
+
+                                               ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
+                                                       (tmp->task == resv->task);
+                                               if(!ret) {
+                                                       /*
+                                                        * so far, no conflict - check that the pmc that is
+                                                        * being reserved isn't accessible from more than
+                                                        * one core, if it is, we need to say it's already
+                                                        * taken.
+                                                        */
+                                                       if(1 != pmc_accessible_core_count(tmp->pmc)) {
+                                                               ret = TRUE;
+                                                       }
+                                               }
+
+                                               break;
+                               }
+
+                               if(ret) break;
+                       }
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 
+ * added to its target queue without createing conflicts (target queue is 
+ * determined by the reservation's scope flags). Further, this method returns
+ * FALSE if any level contains a reservation for a PMC that can be accessed from
+ * more than just 1 core, and the given reservation also wants the same PMC.
+ */
+static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
+       assert(resv);
+       boolean_t ret = TRUE;
+
+       if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
+               pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
+               pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
+               ret = FALSE;
+       }
+
+       return ret;
+}
+
+static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
+       assert(thread);
+
+       /* See if this thread needs it's PMC flag set */
+       pmc_reservation_t tmp = NULL;
+
+       if(!newFlag) {
+               /*
+                * If the parent task just dropped its reservation, iterate the thread
+                * reservations to see if we need to keep the pmc flag set for the given
+                * thread or not.
+                */
+               lck_spin_lock(&reservations_spin);
+       
+               queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
+                       if(tmp->thread == thread) {
+                               newFlag = TRUE;
+                               break;
+                       }
+               }
+
+               lck_spin_unlock(&reservations_spin);
+       }
+
+       if(newFlag) {
+               OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
+       } else {
+               OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
+       }
+}
+
+/* 
+ * This operation is (worst case) O(N*M) where N is number of threads in the
+ * given task, and M is the number of thread reservations in our system.
+ */
+static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
+       assert(task);
+       thread_t thread = NULL;
+
+       if(newFlag) {
+               OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
+       } else {
+               OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
+       }
+
+       task_lock(task);
+
+       queue_iterate(&task->threads, thread, thread_t, task_threads) {
+               /* propagate the task's mask down to each thread  */
+               pmc_internal_update_thread_flag(thread, newFlag);
+       }
+
+       task_unlock(task);
+}
+
+/*
+ * pmc_internal_reservation_add adds a reservation to the global tracking queues after
+ * ensuring there are no reservation conflicts.  To do this, it takes all the
+ * spin locks for all the queue (to ensure no other core goes and adds a
+ * reservation for the same pmc to a queue that has already been checked).
+ */
+static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
+       assert(resv);
+
+       boolean_t ret = FALSE;
+
+       /* always lock all three in the same order */
+       lck_spin_lock(&reservations_spin);
+
+       /* Check if the reservation can be added without conflicts */
+       if(pmc_internal_reservation_validate_for_pmc(resv)) {
+               ret = TRUE;
+       }
+
+       if(ret) {
+               /* add reservation to appropriate scope */
+               switch(PMC_FLAG_SCOPE(resv->flags)) {
+
+                       /* System-wide counter */
+                       case PMC_FLAG_SCOPE_SYSTEM:
+                               /* Simply add it to the system queue */
+                               pmc_internal_reservation_enqueue(system_reservations, resv);
+                               
+                               lck_spin_unlock(&reservations_spin);
+
+                               break;
+
+                       /* Task-switched counter */
+                       case PMC_FLAG_SCOPE_TASK:
+                               assert(resv->task);
+
+                               /* Not only do we enqueue it in our local queue for tracking */
+                               pmc_internal_reservation_enqueue(task_reservations, resv);
+
+                               lck_spin_unlock(&reservations_spin);
+
+                               /* update the task mask, and propagate it to existing threads */
+                               pmc_internal_update_task_flag(resv->task, TRUE);
+                               break;
+
+                       /* Thread-switched counter */
+                       case PMC_FLAG_SCOPE_THREAD:
+                               assert(resv->thread);
+
+                               /*
+                                * Works the same as a task-switched counter, only at
+                                * thread-scope
+                                */
+
+                               pmc_internal_reservation_enqueue(thread_reservations, resv);
+
+                               lck_spin_unlock(&reservations_spin);
+                               
+                               pmc_internal_update_thread_flag(resv->thread, TRUE);
+                               break;
+                       }
+       } else {
+               lck_spin_unlock(&reservations_spin);
+       }                       
+       
+       return ret;
+}
+
+static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
+       uint32_t * cores;
+       size_t core_cnt;
+       
+       /* Get the list of accessible cores */
+       if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
+               boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);
+
+               /* Fast case: the PMC is only accessible from one core and we happen to be on it */
+               if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
+                       action_func(reservation);
+               } else {
+                       /* Call action_func on every accessible core */
+#if defined(__i386__) || defined(__x86_64__)
+                       size_t ii;
+                       cpumask_t mask = 0;
+                       
+                       /* Build a mask for the accessible cores */
+                       if (core_cnt > 0) {
+                               for (ii = 0; ii < core_cnt; ii++) {
+                                       mask |= cpu_to_cpumask(cores[ii]);
+                               }
+                       } else {
+                               /* core_cnt = 0 really means all cpus */
+                               mask = CPUMASK_ALL;
+                       }
+                       
+                       /* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */
+                       mp_cpus_call(mask, ASYNC, action_func, reservation);
+#elif defined(__ppc__)
+                       size_t ii;
+
+                       if (core_cnt > 0) {
+                               for (ii = 0; ii < core_cnt; ii++) {
+                                       if (cores[ii] == (uint32_t)cpu_number()) {
+                                               action_func(reservation);
+                                       } else {
+                                               cpu_signal(cores[ii], SIGPcall, (uint32_t)action_func, (uint32_t)reservation);
+                                       }
+                               }
+                       } else {
+                               uint32_t sync;
+                               cpu_broadcast(&sync, (void (*)(uint32_t))action_func, (uint32_t)reservation);
+                               action_func(reservation);
+                       }
+#else
+#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
+#endif
+               }
+
+               ml_set_interrupts_enabled(intrs_enabled);
+       }
+       
+}
+
+/*
+ * pmc_internal_reservation_remove removes the given reservation from the appropriate
+ * reservation queue according to its scope. 
+ *
+ * NOTE: The scope flag must have been set for this method to function.
+ */
+static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
+       assert(resv);
+
+       /*
+        * Due to the way the macros are written, we can't just blindly queue-remove
+        * the reservation without knowing which queue it's in. We figure this out
+        * using the reservation's scope flags.
+        */
+
+       switch(PMC_FLAG_SCOPE(resv->flags)) {
+
+               case PMC_FLAG_SCOPE_SYSTEM:
+                       lck_spin_lock(&reservations_spin);
+                       pmc_internal_reservation_dequeue(system_reservations, resv);
+                       lck_spin_unlock(&reservations_spin);
+                       break;
+
+               case PMC_FLAG_SCOPE_TASK:
+                       
+                       /* Lock the global spin lock */
+                       lck_spin_lock(&reservations_spin);
+
+                       /* remove from the global queue */
+                       pmc_internal_reservation_dequeue(task_reservations, resv);
+
+                       /* unlock the global */
+                       lck_spin_unlock(&reservations_spin);
+
+                       /* Recalculate task's counter mask */
+                       pmc_internal_update_task_flag(resv->task, FALSE);
+                       break;
+
+               case PMC_FLAG_SCOPE_THREAD:
+                       lck_spin_lock(&reservations_spin);
+
+                       pmc_internal_reservation_dequeue(thread_reservations, resv);
+
+                       lck_spin_unlock(&reservations_spin);
+
+                       /* recalculate the thread's counter mask */
+                       pmc_internal_update_thread_flag(resv->thread, FALSE);
+
+                       break;
+       }
+}
+
+/* Reservation State Machine
+ *
+ * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a 
+ * set of 9 events to provide MP-safe bookkeeping and control flow.  The 3-tuple is comprised 
+ * of a state, a count of active contexts, and a set of modifier flags.  A state machine defines
+ * the possible transitions at each event point given the current 3-tuple.  Atomicity is handled
+ * by reading the current 3-tuple, applying the transformations indicated by the state machine
+ * and then attempting to OSCompareAndSwap the transformed value.  If the OSCompareAndSwap fails,
+ * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
+ * available.
+ *
+ * The state machine is described using tuple notation for the current state and a related notation
+ * for describing the transformations.  For concisness, the flag and state names are abbreviated as
+ * follows:
+ * 
+ * states:
+ * S = STOP
+ * CR = CAN_RUN
+ * L = LOAD
+ * R = RUN
+ * ST = STORE
+ * I = INTERRUPT
+ * D = DEALLOC
+ *
+ * flags:
+ *
+ * S = STOPPING
+ * D = DEALLOCING
+ * I = INTERRUPTING
+ *
+ * The tuple notation is formed from the following pattern:
+ *
+ * tuple = < state, active-context-count, flags >
+ * state = S | CR | L | R | ST | I | D
+ * active-context-count = 0 | >0 | 1 | >1
+ * flags = flags flag | blank
+ * flag = S | D | I
+ *
+ * The transform notation is similar, but only describes the modifications made to the current state.
+ * The notation is formed from the following pattern:
+ * 
+ * transform = < state, active-context-count, flags >
+ * state = S | CR | L | R | ST | I | D
+ * active-context-count = + | - | blank
+ * flags = flags flag | flags !flag | blank
+ * flag = S | D | I
+ *
+ * And now for the state machine:
+ * State               Start           Stop            Free            Interrupt               End Interrupt           Context In              Context Out     Load Finished           Store Finished
+ * <CR, 0, >                           <S, , >         <D, , >                 <L, +, >
+ * <D, 0, >
+ * <D, 1, D>                                                                   < , -, !D>
+ * <D, >1, D>                                                                  < , -, >
+ * <I, 0, D>                                                                   <D, , !D>
+ * <I, 0, S>   < , , !S>                               < , , !SD>              <S, , !S>
+ * <I, 0, >                                    < , , S>        < , , D>        <CR, , >
+ * <L, 1, D>                                                                   <ST, -, >
+ * <L, 1, ID>                                                                  <ST, -, >
+ * <L, 1, IS>                                                  < , , !SD>      <ST, -, >
+ * <L, 1, S>   < , , !S>                               < , , !SD>              <ST, -, >
+ * <L, 1, >                                    < , , S>        < , , D>        < , , IS>                                                       < , +, >        <R, , >
+ * <L, >1, D>                                                                  < , -, >                <R, -, >
+ * <L, >1, ID>                                                                 < , -, >                <R, -, >
+ * <L, >1, IS>                                                 < , , !SD>      < , -, >                <R, -, >
+ * <L, >1, S>  < , , !S>                               < , , !SD>              < , -, >                <R, -, >
+ * <L, >1, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >                <R, , >
+ * <R, 1, D>                                                                   <ST, -, >
+ * <R, 1, ID>                                                                  <ST, -, >
+ * <R, 1, IS>                                                  < , , !SD>      <ST, -, >
+ * <R, 1, S>   < , , !S>                               < , , !SD>              <ST, -, >
+ * <R, 1, >                                    < , , S>        < , , D>        < , , IS>                                                       < , +, >        <ST, -, >
+ * <R, >1, D>                                                                  < , -, >
+ * <R, >1, ID>                                                                 < , -, >
+ * <R, >1, IS>                                                 < , , !SD>      < , -, >
+ * <R, >1, S>  < , , !S>                               < , , !SD>              < , -, >
+ * <R, >1, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >
+ * <S, 0, >            <CR, , >                                <D, , >
+ * <S, 1, ID>                                                                  <I, -, !I>
+ * <S, 1, IS>                                                  < , , !SD>      <I, -, !I>
+ * <S, 1, S>   < , , !S>                               <D, , !SD>              < , -, !S>
+ * <S, 1, >                                    < , , S>        <D, , D>        <L, +, >                <CR, -, >
+ * <S, >1, ID>                                                                 < , -, >
+ * <S, >1, IS>                                                 < , , !SD>      < , -, >
+ * <S, >1, S>  < , , !S>                               <D, , !SD>              < , -, >
+ * <S, >1, >                           < , , S>        <D, , D>                <L, +, >                < , -, >
+ * <ST, 0, D>                                                                  <D, , !D>
+ * <ST, 0, ID>                                                                 <I, , !I>
+ * <ST, 0, IS>                                                 < , , !SD>      <I, , !I>
+ * <ST, 0, S>  < , , !S>                               < , , !SD>              <S, , !S>
+ * <ST, 0, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                <CR, , >
+ * <ST, >0, D>                                                                 < , -, >                                                        <D, , >
+ * <ST, >0, ID>                                                                < , -, >                                                        <S, , >
+ * <ST, >0, IS>                                                        < , , !SD>                                                                              < , -, >                        <S, , >
+ * <ST, >0, S> < , , !S>                               < , , !SD>              < , -, >                                                        <S, , >
+ * <ST, >0, >                          < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >                        <L, , >
+ */
+
+static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
+       uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
+       
+       switch (event) {
+               case PMC_STATE_EVENT_START:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
+                                       new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_STOP:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
+                                               new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_FREE:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
+                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_INTERRUPT:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_END_OF_INTERRUPT:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_CONTEXT_IN:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_CONTEXT_OUT:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
+                                       } else {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }                                       
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
+                                       } else {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
+                                       } else {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
+                                       } else {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
+                                               if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
+                                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
+                                               } else {
+                                                       new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                               }
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
+                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_LOAD_FINISHED:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
+                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
+                                       break;
+                       }
+                       break;
+               case PMC_STATE_EVENT_STORE_FINISHED:
+                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
+                                       }
+                                       break;
+                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
+                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
+                                       } else {
+                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
+                                       }
+                                       break;
+                       }
+                       break;
+       }
+       
+       return new_state;
+}
+
+static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
+       pmc_state_t oldState;
+       pmc_state_t newState;
+
+       assert(reservation);
+       
+       /* Determine what state change, if any, we need to do.  Keep trying until either we succeed doing a transition
+        * or the there is no valid move.
+        */     
+       do {
+               oldState = reservation->state;
+               newState = pmc_internal_reservation_next_state(oldState, event);
+       } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
+       
+       if (newState != PMC_STATE_INVALID) {
+               COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
+       } else {
+               COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
+       }
+       
+       if (old_state_out != NULL) {
+               *old_state_out = oldState;
+       }
+       
+       return newState;
+}
+                                       
+static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
+       assert(reservation);
+       pmc_state_t newState;
+       pmc_state_t oldState;
+
+       /* Clear that the this reservation was active when this cpu did its last context in */
+       OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
+       
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
+               return;
+       }
+       
+       /* Do any actions required based on the state change */
+       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
+               /* Just moved into STORE, so store the reservation. */
+               pmc_internal_reservation_store(reservation);
+       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
+               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
+               thread_wakeup((event_t)reservation);
+       }
+       
+}
+
+static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
+       assert(reservation);
+       pmc_state_t oldState;
+       pmc_state_t newState;
+       
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
+               return;
+       }
+
+       /* Mark that the reservation was active when this cpu did its last context in */
+       OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
+               
+       /* Do any actions required based on the state change */
+       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
+               /* Just moved into LOAD, so load the reservation. */
+               pmc_internal_reservation_load(reservation);
+       }
+       
+}
+
+static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
+       assert(reservation);
+       assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
+       
+       assert(reservation->pmc);
+       assert(reservation->config);
+
+       pmc_state_t newState;
+       kern_return_t ret = KERN_SUCCESS;
+       
+       pmc_t store_pmc = reservation->pmc;
+       pmc_object_t store_pmc_obj = store_pmc->object;
+       perf_monitor_t store_pm = store_pmc->monitor;
+
+       /* 
+        * Instruct the Perf Monitor that contains this counter to turn 
+        * off the global disable for this counter.
+        */
+       ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
+               return;
+       }
+
+       /* Instruct the counter to disable itself */
+       ret = store_pmc->methods.disable(store_pmc_obj);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] disable: 0x%x\n", ret);
+       }
+
+       /*
+        * At this point, we're off the hardware, so we don't have to
+        * set_on_hardare(TRUE) if anything fails from here on.
+        */
+
+       /* store the counter value into the reservation's stored count */
+       ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] get_count: 0x%x\n", ret);
+               return;
+       }
+               
+       /* Advance the state machine now that the STORE is finished */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
+               return;
+       }
+
+       /* Do any actions required based on the state change */
+       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
+               /* Just moved into LOAD, so load the reservation. */
+               pmc_internal_reservation_load(reservation);
+       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
+               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
+               thread_wakeup((event_t)reservation);
+       }
+       
+}
+
+static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
+       assert(reservation);
+       assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);
+
+       pmc_state_t newState;
+       kern_return_t ret = KERN_SUCCESS;
+
+       assert(reservation->pmc);
+       assert(reservation->config);
+       
+       pmc_t load_pmc = reservation->pmc;
+       pmc_object_t load_pmc_obj = load_pmc->object;
+       perf_monitor_t load_pm = load_pmc->monitor;
+
+       /* Set the control register up with the stored configuration */
+       ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] set_config: 0x%x\n", ret);
+               return;
+       }
+
+       /* load the counter value */
+       ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] set_count: 0x%x\n", ret);
+               return;
+       }
+
+       /* Locally enable the counter */
+       ret = load_pmc->methods.enable(load_pmc_obj);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] enable: 0x%x\n", ret);
+               return;
+       }
+
+       /*
+        * Instruct the Perf Monitor containing the pmc to enable the
+        * counter.
+        */
+       ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
+       if(KERN_SUCCESS != ret) {
+               COUNTER_DEBUG("  [error] enable_counters: 0x%x\n", ret);
+               /* not on the hardware. */
+               return;
+       }
+       
+       /* Advance the state machine now that the STORE is finished */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
+               return;
+       }
+
+       /* Do any actions required based on the state change */
+       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
+               /* Just moved into STORE, so store the reservation. */
+               pmc_internal_reservation_store(reservation);
+       }
+       
+}
+
+static void pmc_internal_reservation_start_cpu(void * arg) {
+       pmc_reservation_t reservation = (pmc_reservation_t)arg;
+       
+       assert(reservation);
+       
+       if (pmc_internal_reservation_matches_context(reservation)) {
+               /* We are in context, but the reservation may have already had the context_in method run.  Attempt
+                * to set this cpu's bit in the active_last_context_in mask.  If we set it, call context_in.
+                */
+               uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
+               
+               if ((oldMask & (1U << cpu_number())) == 0) {
+                       COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
+                       
+                       pmc_internal_reservation_context_in(reservation);
+               }
+       }
+}
+
+static void pmc_internal_reservation_stop_cpu(void * arg) {
+       pmc_reservation_t reservation = (pmc_reservation_t)arg;
+       
+       assert(reservation);
+       
+       if (pmc_internal_reservation_matches_context(reservation)) {
+               COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
+
+               pmc_internal_reservation_context_out(reservation);
+       }
+}      
+
+/*!fn
+ * pmc_reservation_interrupt is called when a PMC reservation which was setup
+ * with an interrupt threshold counts the requested number of events. When the
+ * underlying counter hits the threshold, an interrupt is generated, and this
+ * method is called. This method marks the reservation as stopped, and passes
+ * control off to the user-registered callback method, along with the
+ * reservation (so that the user can, for example, write a 0 to the counter, and
+ * restart the reservation).
+ * This method assumes the reservation has a valid pmc_config_t within.
+ *
+ * @param target The pmc_reservation_t that caused the interrupt.
+ * @param refCon User specified reference constant.
+ */
+static void pmc_reservation_interrupt(void *target, void *refCon) {
+       pmc_reservation_t reservation = (pmc_reservation_t)target;
+       pmc_state_t newState;
+       uint64_t timeout;
+       uint32_t spins;
+
+       assert(reservation);
+
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
+               return;
+       }
+
+       /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
+        * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
+        * on every cpu that can access the PMC.
+        */
+       pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
+                       
+       /* Spin waiting for the state to turn to INTERRUPT */
+       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
+       timeout += mach_absolute_time();
+       spins = 0;
+       while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
+               /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
+               if (++spins > PMC_SPIN_THRESHOLD) {
+                       if (mach_absolute_time() > timeout) {
+                               pmc_spin_timeout_count++;
+                               assert(0);
+                       }
+               }
+
+               cpu_pause();
+       }
+                       
+       assert(reservation->config);
+       assert(reservation->config->method);                    
+               
+       /* Call the registered callback handler */
+#if DEBUG_COUNTERS
+       uint64_t start = mach_absolute_time();
+#endif /* DEBUG */
+       
+       (void)reservation->config->method(reservation, refCon);
+       
+#if DEBUG_COUNTERS
+       uint64_t end = mach_absolute_time();
+       if((end - start) > 5000ULL) {
+               kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, 
+                               reservation->config->method, (end - start));
+       }
+#endif
+       
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
+               return;
+       }
+       
+       /* Do any post-move actions necessary */
+       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
+               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
+       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
+               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
+               thread_wakeup((event_t)reservation);
+       }
+}      
+
+/*
+ * Apple-private KPI for Apple kext's (IOProfileFamily) only
+ */
+
+#if 0
+#pragma mark -
+#pragma mark IOProfileFamily private KPI
+#endif
+
+/*
+ * perf_monitor_register registers a new Performance Monitor, and its associated
+ * callback methods.  The given perf_monitor_object_t is the first argument to
+ * each callback when they are called.
+ */
+kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
+       perf_monitor_methods_t *methods) {
+
+       COUNTER_DEBUG("registering perf monitor %p\n", monitor);
+
+       if(!monitor || !methods) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* Protect against out-of-date driver kexts */
+       if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* All methods are required */
+       if(!methods->supports_context_switching || !methods->enable_counters ||
+               !methods->disable_counters) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* prevent dupes. */
+       perf_monitor_t dupe = perf_monitor_find(monitor);
+       if(dupe) {
+               COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
+               perf_monitor_deallocate(dupe);
+               return KERN_FAILURE;
+       }
+
+       perf_monitor_t pm = perf_monitor_alloc();
+       if(!pm) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       /* initialize the object */
+       perf_monitor_init(pm);
+
+       /* copy in the registration info */
+       pm->object = monitor;
+       memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
+
+       /* place it in the tracking queue */
+       perf_monitor_enqueue(pm);
+
+       /* debug it */
+       PRINT_PERF_MON(pm);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * perf_monitor_unregister unregisters a previously registered Perf Monitor,
+ * looking it up by reference pointer (the same that was used in
+ * perf_monitor_register()).
+ */
+kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
+       kern_return_t ret = KERN_FAILURE;
+
+       COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);
+
+       if(!monitor) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       perf_monitor_t pm = perf_monitor_find(monitor);
+       if(pm) {
+               /* Remove it from the queue. */
+               perf_monitor_dequeue(pm);
+
+               /* drop extra retain from find */
+               perf_monitor_deallocate(pm);
+
+               /* and release the object */
+               perf_monitor_deallocate(pm);
+
+               ret = KERN_SUCCESS;
+       } else {
+               COUNTER_DEBUG("could not find a registered pm that matches!\n");
+       }
+
+       return ret;
+}
+
+/*
+ * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
+ * associated with a Perf Monitor.  Perf Monitors are looked up by the reference
+ * pointer that was used to previously register them. 
+ *
+ * PMCs are registered with a reference pointer (@pmc_object), and a set of
+ * callback methods.  When the given callback methods are called from xnu, the
+ * first argument will always be the reference pointer used to register the PMC.
+ *
+ * NOTE: @monitor must have been successfully registered via
+ * perf_monitor_register before this method will succeed.
+ */
+kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
+       pmc_methods_t *methods, void *object) {
+
+       COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
+
+       if(!monitor || !pmc_object || !methods || !object) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* Prevent version mismatches */
+       if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
+               COUNTER_DEBUG("version mismatch\n");
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* All methods are required. */
+       if(!methods->create_config || 
+               !methods->free_config ||
+               !methods->config_set_value || 
+               !methods->config_set_threshold || 
+               !methods->config_set_handler ||
+               !methods->set_config || 
+               !methods->get_monitor || 
+               !methods->get_name ||
+               !methods->accessible_from_core || 
+               !methods->accessible_cores ||
+               !methods->get_count || 
+               !methods->set_count ||
+               !methods->disable ||
+               !methods->enable ||
+               !methods->open || 
+               !methods->close) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* make sure this perf monitor object is already registered */
+       /*
+        * NOTE: this adds a reference to the parent, so we'll have to drop it in
+        * any failure code paths from here on out.
+        */
+       perf_monitor_t pm = perf_monitor_find(monitor);
+       if(!pm) {
+               COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* make a new pmc */
+       pmc_t pmc = pmc_alloc();
+       if(!pmc) {
+               /* drop the extra reference from perf_monitor_find() */
+               perf_monitor_deallocate(pm);
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       /* init it */
+       pmc_init(pmc);
+
+       pmc->object = pmc_object;
+       pmc->open_object = object;
+
+       /* copy the callbacks in */
+       memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));
+
+       pmc->monitor = pm;
+
+       perf_monitor_add_pmc(pmc->monitor, pmc);
+
+       /* enqueue it in our tracking queue */
+       pmc_enqueue(pmc);
+
+       /* drop extra reference from perf_monitor_find() */
+       perf_monitor_deallocate(pm);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_unregister unregisters a previously registered PMC, looking it up by
+ * reference point to *both* the Perf Monitor it was created with, and the PMC's
+ * reference pointer itself.
+ */
+kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
+       COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
+
+       if(!monitor || !pmc_object) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       pmc_t pmc = pmc_find(pmc_object);
+       if(!pmc) {
+               COUNTER_DEBUG("Could not find a matching pmc.\n");
+               return KERN_FAILURE;
+       }
+
+       /* remove it from the global queue */
+       pmc_dequeue(pmc);
+
+       perf_monitor_remove_pmc(pmc->monitor, pmc);
+
+       /* remove extra reference count from pmc_find() */
+       pmc_deallocate(pmc);
+
+       /* dealloc the pmc */
+       pmc_deallocate(pmc);
+
+       return KERN_SUCCESS;
+}
+
+#if 0
+#pragma mark -
+#pragma mark KPI
+#endif
+
+/*
+ * Begin in-kernel and in-kext KPI methods
+ */
+
+/*
+ * pmc_create_config creates a new configuration area from a given @pmc.
+ *
+ * NOTE: This method is not interrupt safe.
+ */
+kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
+       pmc_config_t tmp = NULL;
+
+       if(!pmc || !config) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       pmc_reference(pmc);
+
+       tmp = pmc_config_alloc(pmc);
+       if(tmp) {
+               tmp->object = pmc->methods.create_config(pmc->object);
+
+               if(!tmp->object) {
+                       pmc_config_free(pmc, tmp);
+                       tmp = NULL;
+               } else {
+                       tmp->interrupt_after_value = 0ULL;
+                       tmp->method = NULL;
+                       tmp->refCon = NULL;
+               }
+       }
+
+       pmc_deallocate(pmc);
+
+       if(!tmp) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       *config = tmp;
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_free_config frees a configuration area created from a given @pmc
+ *
+ * NOTE: This method is not interrupt safe.
+ */
+void pmc_free_config(pmc_t pmc, pmc_config_t config) {
+       assert(pmc);
+       assert(config);
+
+       pmc_reference(pmc);
+
+       pmc_config_free(pmc, config);
+
+       pmc_deallocate(pmc);
+}
+
+/*
+ * pmc_config_set_value sets up configuration area key-value pairs.  These pairs
+ * are to be either pre-known, or looked up via CoreProfile.framework.
+ *
+ * NOTE: This method is not interrupt safe.
+ */
+kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
+       uint8_t id, uint64_t value) {
+
+       kern_return_t ret = KERN_INVALID_ARGUMENT;
+       
+       if(!pmc || !config) {
+               return ret;
+       }
+
+       pmc_reference(pmc);
+
+       ret = pmc->methods.config_set_value(config->object, id, value);
+
+       pmc_deallocate(pmc);
+
+       return ret;
+}
+
+/*
+ * pmc_config_set_interrupt_threshold modifies a config object, instructing
+ * the pmc that it should generate a call to the given pmc_interrupt_method_t
+ * after the counter counts @threshold events.
+ *
+ * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
+ * as the first argument when the interrupt handler is invoked, and the given
+ * @refCon (which may be NULL) as the second.
+ *
+ * See pmc_interrupt_method_t.
+ *
+ * NOTE: This method is not interrupt safe.
+ */
+kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, 
+       uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
+       kern_return_t ret = KERN_INVALID_ARGUMENT;
+
+       if(!config || !pmc) {
+               return ret;
+       }
+       
+       assert(config);
+       assert(pmc);
+
+       pmc_reference(pmc);
+
+       do {
+               /*
+                * We have a minor annoyance to side-step here. The driver layer expects
+                * the config to never change once a reservation has been taken out with
+                * it.  However, in order to have the PMI method have the reservation as
+                * the first argument (in order to allow the user-method to, for
+                * example, write a 0 to it, and restart it), we need to create the
+                * pmc_reservation_t before setting it up in the config object.
+                * We overcome this by caching the method in the pmc_config_t stand-in,
+                * and mutating the pmc_config_object_t just before returning a
+                * reservation (in pmc_reserve() and friends, below).
+                */
+
+               /* might as well stash this away too. */
+               config->interrupt_after_value = threshold;
+               config->method = method;
+               config->refCon = refCon;
+
+               ret = KERN_SUCCESS;
+
+       }while(0);
+
+       pmc_deallocate(pmc);
+
+       return ret;
+}
+
+/*
+ * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
+ * of pmc_t's returned. Callers should free this list with a call to
+ * pmc_free_pmc_list().
+ *
+ * NOTE: This method is not interrupt safe.
+ */
+kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
+       pmc_t *array = NULL;
+       pmc_t pmc = NULL;
+       size_t count = 0UL;
+       
+       do {
+               /* Copy down (to the stack) the count of perf counters */
+               vm_size_t size = perf_counters_count;
+
+               /* Allocate that sized chunk */
+               array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
+               if(!array) {
+                       return KERN_RESOURCE_SHORTAGE;
+               }
+
+               /* Take the spin lock */
+               lck_spin_lock(&perf_counters_queue_spin);
+
+               /* verify the size didn't change while we were allocating */
+               if(size != perf_counters_count) {
+                       /*
+                        * queue size has changed between alloc and now - go back and
+                        * make another pass.
+                        */
+
+                       /* drop the lock */
+                       lck_spin_unlock(&perf_counters_queue_spin);
+
+                       /* free the block */
+                       kfree(array, sizeof(pmc_t) * size);
+                       array = NULL;
+               }
+
+               /* if we get here, and array is NULL, we try again. */
+       }while(!array);
+
+       /* copy the bits out */
+       queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
+               if(pmc) {
+                       /* copy out the pointer */
+                       array[count++] = pmc;
+               }
+       }
+
+       lck_spin_unlock(&perf_counters_queue_spin);
+
+       /* return the list and the size */
+       *pmcs = array;
+       *pmcCount = count;
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_free_pmc_list frees an array of pmc_t that has been returned from
+ * pmc_get_pmc_list.
+ * 
+ * NOTE: This method is not interrupt safe.
+ */
+void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
+       if(pmcs && pmcCount) {
+               COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);
+
+               kfree(pmcs, pmcCount * sizeof(pmc_t));
+       }
+}
+
+kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
+       kern_return_t ret = KERN_INVALID_ARGUMENT;
+
+       if(!name || !pmcs || !pmcCount) {
+               return ret;
+       }
+
+       pmc_t *list = NULL;
+       size_t count = 0UL;
+
+       if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
+               size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
+               size_t len = strlen(name);
+
+               for(ii = 0UL; ii < count; ii++) {
+                       const char *pmcName = pmc_get_name(list[ii]);
+
+                       if(strlen(pmcName) < len) {
+                               /*
+                                * If the pmc name is shorter than the requested match, it's no 
+                                * match, as we're looking for the most specific match(es).
+                                */
+                               continue;
+                       }
+
+                       if(0 == strncmp(name, pmcName, len)) {
+                               pmc_t temp = list[ii];
+                               
+                               // move matches to the head of the array.
+                               list[ii] = list[swapPtr];
+                               list[swapPtr] = temp;
+                               swapPtr++;
+
+                               // keep a count of the matches
+                               matchCount++;
+                       }
+               }
+
+               if(matchCount) {
+                       /*
+                        * If we have matches, they are all at the head of the array, so
+                        * just allocate enough space for @matchCount pmc_t's, and copy the
+                        * head of the array to the new allocation.  Then free the old
+                        * allocation.
+                        */
+
+                       pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
+                       if(result) {
+                               // copy the matches
+                               memcpy(result, list, sizeof(pmc_t) * matchCount);
+
+                               ret = KERN_SUCCESS;
+                       }
+
+                       pmc_free_pmc_list(list, count);
+
+                       if(!result) {
+                               *pmcs = NULL;
+                               *pmcCount = 0UL;
+                               return KERN_RESOURCE_SHORTAGE;
+                       }
+
+                       *pmcs = result;
+                       *pmcCount = matchCount;
+               } else {
+                       *pmcs = NULL;
+                       *pmcCount = 0UL;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * pmc_get_name returns a pointer (not copied) to the human-readable name of the
+ * given pmc.
+ *
+ * NOTE: Driver authors must take care to not allocate during this method, as
+ * this method *IS* interrupt safe.
+ */
+const char *pmc_get_name(pmc_t pmc) {
+       assert(pmc);
+
+       const char *name = pmc->methods.get_name(pmc->object);
+
+       return name;
+}
+
+/*
+ * pmc_get_accessible_core_list returns a pointer to an array of logical core
+ * numbers (as well as the size of that array) that represent the local cores
+ * (hardware threads) from which the given @pmc can be accessed directly.
+ *
+ * NOTE: This method is interrupt safe.
+ */
+kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
+       size_t *logicalCoreCt) {
+
+       kern_return_t ret = KERN_INVALID_ARGUMENT;
+
+       if(!pmc || !logicalCores || !logicalCoreCt) {
+               return ret;
+       }
+
+       ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);
+
+       return ret;
+}
+
+/*
+ * pmc_accessible_from_core will return TRUE if the given @pmc is directly
+ * (e.g., hardware) readable from the given logical core.
+ *
+ * NOTE: This method is interrupt safe.
+ */
+boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
+       boolean_t ret = FALSE;
+
+       assert(pmc);
+
+       ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
+
+       return ret;
+}
+
+static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
+       assert(resv);
+       assert(resv->pmc);
+       assert(config);
+       assert(config->object);
+
+       /* If there's no PMI to setup, return success */
+       if(config->interrupt_after_value && config->method) {
+
+               /* set the threshold */
+               kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
+                       config->interrupt_after_value);
+
+               if(KERN_SUCCESS != ret) {
+                       /*
+                        * This is the most useful error message here, as this only happens
+                        * as a result of pmc_reserve*()
+                        */
+                       COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
+                       return FALSE;
+               }
+
+               if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, 
+                       (void *)resv, &pmc_reservation_interrupt, config->refCon)) {
+
+                       COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
+                       return FALSE;
+               }
+       }
+
+       return TRUE;
+}
+
+/*
+ * pmc_reserve will attempt to reserve the given @pmc, with a given
+ * configuration object, for counting system-wide. This method will fail with
+ * KERN_FAILURE if the given pmc is already reserved at any scope.
+ *
+ * This method consumes the given configuration object if it returns
+ * KERN_SUCCESS. Any other return value indicates the caller
+ * must free the config object via pmc_free_config().
+ *
+ * NOTE: This method is NOT interrupt safe.
+ */
+kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
+       pmc_reservation_t *reservation) {
+
+       if(!pmc || !config || !reservation) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       pmc_reservation_t resv = reservation_alloc();
+       if(!resv) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       reservation_init(resv);
+
+       resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
+       resv->config = config;
+
+       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
+               resv->config = NULL;
+               return KERN_FAILURE;
+       }
+       
+       /* enqueue reservation in proper place */
+       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
+               /* Prevent free of config object */
+               resv->config = NULL;
+               
+               reservation_free(resv);
+               return KERN_FAILURE;
+       }
+
+       /* Here's where we setup the PMI method (if needed) */
+       
+       *reservation = resv;
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_reserve_task will attempt to reserve the given @pmc with a given
+ * configuration object, for counting when the given @task is running on any
+ * logical core that can directly access the given @pmc.  This method will fail
+ * with KERN_FAILURE if the given pmc is already reserved at either system or
+ * thread scope.  
+ *
+ * This method consumes the given configuration object if it returns
+ * KERN_SUCCESS. Any other return value indicates the caller
+ * must free the config object via pmc_free_config().
+ *
+ * NOTE: You can reserve the same pmc for N different tasks concurrently.
+ * NOTE: This method is NOT interrupt safe.
+ */
+kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, 
+       task_t task, pmc_reservation_t *reservation) {
+
+       if(!pmc || !config || !reservation || !task) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
+               COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       pmc_reservation_t resv = reservation_alloc();
+       if(!resv) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       reservation_init(resv);
+
+       resv->flags |= PMC_FLAG_SCOPE_TASK;
+       resv->task = task;
+
+       resv->config = config;
+
+       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
+               resv->config = NULL;
+               return KERN_FAILURE;
+       }
+       
+       /* enqueue reservation in proper place */
+       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
+               /* Prevent free of config object */
+               resv->config = NULL;
+
+               reservation_free(resv);
+               return KERN_FAILURE;
+       }
+
+       *reservation = resv;
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_reserve_thread will attempt to reserve the given @pmc with a given
+ * configuration object, for counting when the given @thread is running on any
+ * logical core that can directly access the given @pmc.  This method will fail
+ * with KERN_FAILURE if the given pmc is already reserved at either system or
+ * task scope.  
+ *
+ * This method consumes the given configuration object if it returns
+ * KERN_SUCCESS. Any other return value indicates the caller
+ * must free the config object via pmc_free_config().
+ *
+ * NOTE: You can reserve the same pmc for N different threads concurrently.
+ * NOTE: This method is NOT interrupt safe.
+ */
+kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, 
+       thread_t thread, pmc_reservation_t *reservation) {
+       if(!pmc || !config || !reservation || !thread) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
+               COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       pmc_reservation_t resv = reservation_alloc();
+       if(!resv) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       reservation_init(resv);
+
+       resv->flags |= PMC_FLAG_SCOPE_THREAD;
+       resv->thread = thread;
+
+       resv->config = config;
+
+       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
+               resv->config = NULL;
+               return KERN_FAILURE;
+       }
+       
+       /* enqueue reservation in proper place */
+       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
+               /* Prevent free of config object */
+               resv->config = NULL;
+
+               reservation_free(resv);
+               return KERN_FAILURE;
+       }
+
+       *reservation = resv;
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_reservation_start instructs the given reservation to start counting as
+ * soon as possible. 
+ *
+ * NOTE: This method is interrupt safe.
+ */
+kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
+       pmc_state_t newState;
+
+       if(!reservation) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
+               return KERN_FAILURE;
+       }
+       
+       /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
+        * broadcast right before it leaves
+        */
+       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {   
+               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
+                * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
+                * on every cpu that can access the PMC.
+                */
+               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
+       }
+       
+       return KERN_SUCCESS;                     
+}
+
+/*
+ * pmc_reservation_stop instructs the given reservation to stop counting as
+ * soon as possible.  When this method returns, the pmc will be marked as stopping
+ * and subsequent calls to pmc_reservation_start will succeed.  This does not mean
+ * that the pmc hardware has _actually_ stopped running.  Assuming no other changes
+ * to the reservation state, the pmc hardware _will_ stop shortly.
+ *
+ */
+kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
+       pmc_state_t newState;
+
+       if(!reservation) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
+               return KERN_FAILURE;
+       }
+       
+       /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
+        * broadcast right before it leaves.  Similarly, if we just moved directly to STOP, don't bother broadcasting.
+        */
+       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {      
+               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
+                        * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
+                * on every cpu that can access the PMC.
+                */
+               
+               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
+       }
+       
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_reservation_read will read the event count associated with a reservation.
+ * If the caller is current executing in a context that both a) matches the
+ * reservation's context, and b) can access the reservation's pmc directly, the
+ * value will be read from hardware.  Otherwise, this returns the reservation's
+ * stored value.
+ *
+ * NOTE: This method is interrupt safe.
+ * NOTE: When not on the interrupt stack, this method may block.
+ */
+kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
+       kern_return_t ret = KERN_FAILURE;
+       uint64_t timeout;
+       uint32_t spins;
+
+       if(!reservation || !value) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
+       timeout += mach_absolute_time();
+       spins = 0;
+       do {
+               uint32_t state = reservation->state;
+               
+               if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
+                       /* Attempt read from hardware via drivers. */
+
+                       assert(reservation->pmc);
+
+                       ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
+                       
+                       break;
+               } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
+                                  (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
+                       /* Spin */
+                       /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
+                       if (++spins > PMC_SPIN_THRESHOLD) {
+                               if (mach_absolute_time() > timeout) {
+                                       pmc_spin_timeout_count++;
+                                       assert(0);
+                               }
+                       }
+
+                       cpu_pause();
+               } else {
+                       break;
+               }
+       } while (1);
+
+       /* If the direct hardware read failed (for whatever reason) */
+       if(KERN_SUCCESS != ret) {
+               /* Read stored value */
+               *value = reservation->value;
+       }
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_reservation_write will write the event count associated with a reservation.
+ * If the caller is current executing in a context that both a) matches the
+ * reservation's context, and b) can access the reservation's pmc directly, the
+ * value will be written to hardware.  Otherwise, this writes the reservation's
+ * stored value.
+ *
+ * NOTE: This method is interrupt safe.
+ * NOTE: When not on the interrupt stack, this method may block.
+ */
+kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
+       kern_return_t ret = KERN_FAILURE;
+       uint64_t timeout;
+       uint32_t spins;
+
+       if(!reservation) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
+       timeout += mach_absolute_time();
+       spins = 0;
+       do {
+               uint32_t state = reservation->state;
+               
+               if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
+                               /* Write to hardware via drivers. */
+                       assert(reservation->pmc);
+
+                       ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
+                       break;
+               } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
+                                  (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
+                       /* Spin */
+                       /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
+                       if (++spins > PMC_SPIN_THRESHOLD) {
+                               if (mach_absolute_time() > timeout) {
+                                       pmc_spin_timeout_count++;
+                                       assert(0);
+                               }
+                       }
+
+                       cpu_pause();
+               } else {
+                       break;
+               }
+       } while (1);
+       
+       if(KERN_SUCCESS != ret) {
+               /* Write stored value */
+               reservation->value = value;
+       }
+
+       return KERN_SUCCESS;
+}
+
+/* 
+ * pmc_reservation_free releases a reservation and all associated resources.
+ *
+ * NOTE: This method is NOT interrupt safe.
+ */
+kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
+       pmc_state_t newState;
+       
+       if(!reservation) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       
+       /* Move the state machine */
+       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
+               return KERN_FAILURE;
+       }
+
+       /* If we didn't move directly to DEALLOC, help things along */  
+       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {     
+               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
+                * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
+                * on every cpu that can access the PMC.
+                */
+               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
+       }
+
+       /* Block until the reservation hits the <DEALLOC, 0, > state */
+       while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
+               assert_wait((event_t)reservation, THREAD_UNINT);
+               thread_block(THREAD_CONTINUE_NULL);
+       }
+
+       /* remove from queues */
+       pmc_internal_reservation_remove(reservation);
+               
+       /* free reservation */
+       reservation_free(reservation);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * pmc_context_switch performs all context switching necessary to save all pmc
+ * state associated with @oldThread (and the task to which @oldThread belongs),
+ * as well as to restore all pmc state associated with @newThread (and the task
+ * to which @newThread belongs).
+ *
+ * NOTE: This method IS interrupt safe.
+ */
+boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
+       pmc_reservation_t resv = NULL;
+       uint32_t cpuNum = cpu_number();
+
+       /* Out going thread: save pmc state */
+       lck_spin_lock(&reservations_spin);
+
+       /* interate over any reservations */
+       queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
+               if(resv && oldThread == resv->thread) {
+
+                       /* check if we can read the associated pmc from this core. */
+                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+                               /* save the state At this point, if it fails, it fails. */
+                               (void)pmc_internal_reservation_context_out(resv);
+                       }
+               }
+       }
+       
+       queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
+               if(resv && resv->task == oldThread->task) {
+                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+                               (void)pmc_internal_reservation_context_out(resv);
+                       }
+               }
+       }
+       
+       /* Incoming task: restore */
+
+       queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
+               if(resv && resv->thread == newThread) {
+                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+                               (void)pmc_internal_reservation_context_in(resv);
+                       }
+               }
+       }
+       
+
+       queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
+               if(resv && resv->task == newThread->task) {
+                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+                               (void)pmc_internal_reservation_context_in(resv);
+                       }
+               }
+       }
+       
+       lck_spin_unlock(&reservations_spin);
+
+       return TRUE;
+}
+
+#else /* !CONFIG_COUNTERS */
+
+#if 0
+#pragma mark -
+#pragma mark Dummy functions
+#endif
+
+/*
+ * In the case that someone has chosen not to include the PMC KPI in some
+ * configuration, we still have exports for kexts, so we'll need to define stub
+ * methods that return failures.
+ */
+kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
+       perf_monitor_methods_t *methods __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_register(perf_monitor_object_t monitor __unused, 
+       pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
+       pmc_object_t pmc __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_create_config(pmc_t pmc __unused, 
+       pmc_config_t *config __unused) {
+       return KERN_FAILURE;
+}
+
+void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
+}
+
+kern_return_t pmc_config_set_value(pmc_t pmc __unused, 
+       pmc_config_t config __unused, uint8_t id __unused, 
+       uint64_t value __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, 
+       pmc_config_t config __unused, uint64_t threshold __unused, 
+       pmc_interrupt_method_t method __unused, void *refCon __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
+       return KERN_FAILURE;
+}
+
+void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
+}
+
+kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, 
+       size_t *pmcCount __unused) {
+       return KERN_FAILURE;
+}
+
+const char *pmc_get_name(pmc_t pmc __unused) {
+       return "";
+}
+
+kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, 
+       uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
+       return KERN_FAILURE;
+}
+
+boolean_t pmc_accessible_from_core(pmc_t pmc __unused, 
+       uint32_t logicalCore __unused) {
+       return FALSE;
+}
+
+kern_return_t pmc_reserve(pmc_t pmc __unused, 
+       pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reserve_task(pmc_t pmc __unused, 
+       pmc_config_t config __unused, task_t task __unused, 
+       pmc_reservation_t *reservation __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reserve_thread(pmc_t pmc __unused, 
+       pmc_config_t config __unused, thread_t thread __unused, 
+       pmc_reservation_t *reservation __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, 
+       uint64_t *value __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, 
+       uint64_t value __unused) {
+       return KERN_FAILURE;
+}
+
+kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
+       return KERN_FAILURE;
+}
+
+
+#endif /* !CONFIG_COUNTERS */