+
+typedef struct {
+ queue_chain_t link; /* queue linkage */
+ void (*func)(void *,void *); /* routine to call */
+ void *arg0; /* routine's 1st arg */
+ void *arg1; /* routine's 2nd arg */
+ volatile long *countp; /* completion counter */
+} mp_call_t;
+
+
+typedef struct {
+ queue_head_t queue;
+ decl_simple_lock_data(, lock);
+} mp_call_queue_t;
+#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
+static mp_call_queue_t mp_cpus_call_freelist;
+static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
+
+static inline boolean_t
+mp_call_head_lock(mp_call_queue_t *cqp)
+{
+ boolean_t intrs_enabled;
+
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ simple_lock(&cqp->lock);
+
+ return intrs_enabled;
+}
+
+static inline boolean_t
+mp_call_head_is_locked(mp_call_queue_t *cqp)
+{
+ return !ml_get_interrupts_enabled() &&
+ hw_lock_held((hw_lock_t)&cqp->lock);
+}
+
+static inline void
+mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
+{
+ simple_unlock(&cqp->lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_alloc(void)
+{
+ mp_call_t *callp = NULL;
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ if (!queue_empty(&cqp->queue))
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ mp_call_head_unlock(cqp, intrs_enabled);
+
+ return callp;
+}
+
+static inline void
+mp_call_free(mp_call_t *callp)
+{
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ queue_enter_first(&cqp->queue, callp, typeof(callp), link);
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_dequeue_locked(mp_call_queue_t *cqp)
+{
+ mp_call_t *callp = NULL;
+
+ assert(mp_call_head_is_locked(cqp));
+ if (!queue_empty(&cqp->queue))
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ return callp;
+}
+
+static inline void
+mp_call_enqueue_locked(
+ mp_call_queue_t *cqp,
+ mp_call_t *callp)
+{
+ queue_enter(&cqp->queue, callp, typeof(callp), link);
+}
+
+/* Called on the boot processor to initialize global structures */
+static void
+mp_cpus_call_init(void)
+{
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ DBG("mp_cpus_call_init()\n");
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+}
+
+/*
+ * Called by each processor to add call buffers to the free list
+ * and to initialize the per-cpu call queue.
+ * Also called but ignored on slave processors on re-start/wake.
+ */
+static void
+mp_cpus_call_cpu_init(void)
+{
+ int i;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()];
+ mp_call_t *callp;
+
+ if (cqp->queue.next != NULL)
+ return; /* restart/wake case: called already */
+
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+ for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
+ callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
+ mp_call_free(callp);
+ }
+
+ DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
+}
+
+/*
+ * This is called from cpu_signal_handler() to process an MP_CALL signal.
+ * And also from i386_deactivate_cpu() when a cpu is being taken offline.
+ */
+static void
+mp_cpus_call_action(void)
+{
+ mp_call_queue_t *cqp;
+ boolean_t intrs_enabled;
+ mp_call_t *callp;
+ mp_call_t call;
+
+ assert(!ml_get_interrupts_enabled());
+ cqp = &mp_cpus_call_head[cpu_number()];
+ intrs_enabled = mp_call_head_lock(cqp);
+ while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
+ /* Copy call request to the stack to free buffer */
+ call = *callp;
+ mp_call_free(callp);
+ if (call.func != NULL) {
+ mp_call_head_unlock(cqp, intrs_enabled);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_ACTION,
+ call.func, call.arg0, call.arg1, call.countp, 0);
+ call.func(call.arg0, call.arg1);
+ (void) mp_call_head_lock(cqp);
+ }
+ if (call.countp != NULL)
+ atomic_incl(call.countp, 1);
+ }
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+/*
+ * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
+ * Possible modes are:
+ * SYNC: function is called serially on target cpus in logical cpu order
+ * waiting for each call to be acknowledged before proceeding
+ * ASYNC: function call is queued to the specified cpus
+ * waiting for all calls to complete in parallel before returning
+ * NOSYNC: function calls are queued
+ * but we return before confirmation of calls completing.
+ * The action function may be NULL.
+ * The cpu mask may include the local cpu. Offline cpus are ignored.
+ * The return value is the number of cpus on which the call was made or queued.
+ */
+cpu_t
+mp_cpus_call(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *),
+ void *arg)
+{
+ return mp_cpus_call1(
+ cpus,
+ mode,
+ (void (*)(void *,void *))action_func,
+ arg,
+ NULL,
+ NULL,
+ NULL);
+}
+
+static void
+mp_cpus_call_wait(boolean_t intrs_enabled,
+ long mp_cpus_signals,
+ volatile long *mp_cpus_calls)
+{
+ mp_call_queue_t *cqp;
+ uint64_t tsc_spin_start;
+
+ cqp = &mp_cpus_call_head[cpu_number()];
+
+ tsc_spin_start = rdtsc64();
+ while (*mp_cpus_calls < mp_cpus_signals) {
+ if (!intrs_enabled) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue))
+ mp_cpus_call_action();
+ handle_pending_TLB_flushes();
+ }
+ mp_spin_timeout_check(tsc_spin_start, "mp_cpus_call_wait()");
+ }
+}
+
+cpu_t
+mp_cpus_call1(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *, void *),
+ void *arg0,
+ void *arg1,
+ cpumask_t *cpus_calledp,
+ cpumask_t *cpus_notcalledp)
+{
+ cpu_t cpu;
+ boolean_t intrs_enabled = FALSE;
+ boolean_t call_self = FALSE;
+ cpumask_t cpus_called = 0;
+ cpumask_t cpus_notcalled = 0;
+ long mp_cpus_signals = 0;
+ volatile long mp_cpus_calls = 0;
+ uint64_t tsc_spin_start;
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_START,
+ cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
+
+ if (!smp_initialized) {
+ if ((cpus & CPUMASK_SELF) == 0)
+ goto out;
+ if (action_func != NULL) {
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ call_self = TRUE;
+ goto out;
+ }
+
+ /*
+ * Queue the call for each non-local requested cpu.
+ * The topo lock is not taken. Instead we sniff the cpu_running state
+ * and then re-check it after taking the call lock. A cpu being taken
+ * offline runs the action function after clearing the cpu_running.
+ */
+ mp_disable_preemption(); /* interrupts may be enabled */
+ tsc_spin_start = rdtsc64();
+ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
+ if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
+ !cpu_datap(cpu)->cpu_running)
+ continue;
+ if (cpu == (cpu_t) cpu_number()) {
+ /*
+ * We don't IPI ourself and if calling asynchronously,
+ * we defer our call until we have signalled all others.
+ */
+ call_self = TRUE;
+ cpus_called |= cpu_to_cpumask(cpu);
+ if (mode == SYNC && action_func != NULL) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func),
+ arg0, arg1, 0, 0);
+ action_func(arg0, arg1);
+ }
+ } else {
+ /*
+ * Here to queue a call to cpu and IPI.
+ * Spinning for request buffer unless NOSYNC.
+ */
+ mp_call_t *callp = NULL;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+
+ queue_call:
+ if (callp == NULL)
+ callp = mp_call_alloc();
+ intrs_enabled = mp_call_head_lock(cqp);
+ if (!cpu_datap(cpu)->cpu_running) {
+ mp_call_head_unlock(cqp, intrs_enabled);
+ continue;
+ }
+ if (mode == NOSYNC) {
+ if (callp == NULL) {
+ cpus_notcalled |= cpu_to_cpumask(cpu);
+ mp_call_head_unlock(cqp, intrs_enabled);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_NOBUF,
+ cpu, 0, 0, 0, 0);
+ continue;
+ }
+ callp->countp = NULL;
+ } else {
+ if (callp == NULL) {
+ mp_call_head_unlock(cqp, intrs_enabled);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_NOBUF,
+ cpu, 0, 0, 0, 0);
+ if (!intrs_enabled) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue))
+ mp_cpus_call_action();
+ handle_pending_TLB_flushes();
+ }
+ mp_spin_timeout_check(
+ tsc_spin_start,
+ "mp_cpus_call1()");
+ goto queue_call;
+ }
+ callp->countp = &mp_cpus_calls;
+ }
+ callp->func = action_func;
+ callp->arg0 = arg0;
+ callp->arg1 = arg1;
+ mp_call_enqueue_locked(cqp, callp);
+ mp_cpus_signals++;
+ cpus_called |= cpu_to_cpumask(cpu);
+ i386_signal_cpu(cpu, MP_CALL, ASYNC);
+ mp_call_head_unlock(cqp, intrs_enabled);
+ if (mode == SYNC) {
+ mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
+ }
+ }
+ }
+
+ /* Call locally if mode not SYNC */
+ if (mode != SYNC && call_self ) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
+ if (action_func != NULL) {
+ ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ }
+
+ /* Safe to allow pre-emption now */
+ mp_enable_preemption();
+
+ /* For ASYNC, now wait for all signaled cpus to complete their calls */
+ if (mode == ASYNC) {
+ mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
+ }
+
+out:
+ cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
+
+ if (cpus_calledp)
+ *cpus_calledp = cpus_called;
+ if (cpus_notcalledp)
+ *cpus_notcalledp = cpus_notcalled;
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_END,
+ cpu, cpus_called, cpus_notcalled, 0, 0);
+
+ return cpu;
+}
+
+
+static void
+mp_broadcast_action(void)
+{
+ /* call action function */
+ if (mp_bc_action_func != NULL)
+ mp_bc_action_func(mp_bc_func_arg);
+
+ /* if we're the last one through, wake up the instigator */
+ if (atomic_decl_and_test(&mp_bc_count, 1))
+ thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
+}
+
+/*
+ * mp_broadcast() runs a given function on all active cpus.
+ * The caller blocks until the functions has run on all cpus.
+ * The caller will also block if there is another pending braodcast.
+ */