+
+typedef struct {
+ queue_chain_t link; /* queue linkage */
+ void (*func)(void *,void *); /* routine to call */
+ void *arg0; /* routine's 1st arg */
+ void *arg1; /* routine's 2nd arg */
+ cpumask_t *maskp; /* completion response mask */
+} mp_call_t;
+
+
+typedef struct {
+ queue_head_t queue;
+ decl_simple_lock_data(, lock);
+} mp_call_queue_t;
+#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
+static mp_call_queue_t mp_cpus_call_freelist;
+static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
+
+static inline boolean_t
+mp_call_head_lock(mp_call_queue_t *cqp)
+{
+ boolean_t intrs_enabled;
+
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ simple_lock(&cqp->lock);
+
+ return intrs_enabled;
+}
+
+/*
+ * Deliver an NMIPI to a set of processors to cause them to panic .
+ */
+void
+NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why) {
+ unsigned int cpu, cpu_bit;
+ uint64_t deadline;
+
+ NMIPI_enable(TRUE);
+ NMI_panic_reason = why;
+
+ for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+ if ((cpu_mask & cpu_bit) == 0)
+ continue;
+ cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
+ cpu_NMI_interrupt(cpu);
+ }
+
+ /* Wait (only so long) for NMi'ed cpus to respond */
+ deadline = mach_absolute_time() + LockTimeOut;
+ for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+ if ((cpu_mask & cpu_bit) == 0)
+ continue;
+ while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
+ mach_absolute_time() < deadline) {
+ cpu_pause();
+ }
+ }
+}
+
+#if MACH_ASSERT
+static inline boolean_t
+mp_call_head_is_locked(mp_call_queue_t *cqp)
+{
+ return !ml_get_interrupts_enabled() &&
+ hw_lock_held((hw_lock_t)&cqp->lock);
+}
+#endif
+
+static inline void
+mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
+{
+ simple_unlock(&cqp->lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_alloc(void)
+{
+ mp_call_t *callp = NULL;
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ if (!queue_empty(&cqp->queue))
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ mp_call_head_unlock(cqp, intrs_enabled);
+
+ return callp;
+}
+
+static inline void
+mp_call_free(mp_call_t *callp)
+{
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ queue_enter_first(&cqp->queue, callp, typeof(callp), link);
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_dequeue_locked(mp_call_queue_t *cqp)
+{
+ mp_call_t *callp = NULL;
+
+ assert(mp_call_head_is_locked(cqp));
+ if (!queue_empty(&cqp->queue))
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ return callp;
+}
+
+static inline void
+mp_call_enqueue_locked(
+ mp_call_queue_t *cqp,
+ mp_call_t *callp)
+{
+ queue_enter(&cqp->queue, callp, typeof(callp), link);
+}
+
+/* Called on the boot processor to initialize global structures */
+static void
+mp_cpus_call_init(void)
+{
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ DBG("mp_cpus_call_init()\n");
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+}
+
+/*
+ * Called at processor registration to add call buffers to the free list
+ * and to initialize the per-cpu call queue.
+ */
+void
+mp_cpus_call_cpu_init(int cpu)
+{
+ int i;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+ mp_call_t *callp;
+
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+ for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
+ callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
+ mp_call_free(callp);
+ }
+
+ DBG("mp_cpus_call_init(%d) done\n", cpu);
+}
+
+/*
+ * This is called from cpu_signal_handler() to process an MP_CALL signal.
+ * And also from i386_deactivate_cpu() when a cpu is being taken offline.
+ */
+static void
+mp_cpus_call_action(void)
+{
+ mp_call_queue_t *cqp;
+ boolean_t intrs_enabled;
+ mp_call_t *callp;
+ mp_call_t call;
+
+ assert(!ml_get_interrupts_enabled());
+ cqp = &mp_cpus_call_head[cpu_number()];
+ intrs_enabled = mp_call_head_lock(cqp);
+ while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
+ /* Copy call request to the stack to free buffer */
+ call = *callp;
+ mp_call_free(callp);
+ if (call.func != NULL) {
+ mp_call_head_unlock(cqp, intrs_enabled);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_ACTION,
+ VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
+ VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
+ call.func(call.arg0, call.arg1);
+ (void) mp_call_head_lock(cqp);
+ }
+ if (call.maskp != NULL)
+ i_bit_set(cpu_number(), call.maskp);
+ }
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+/*
+ * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
+ * Possible modes are:
+ * SYNC: function is called serially on target cpus in logical cpu order
+ * waiting for each call to be acknowledged before proceeding
+ * ASYNC: function call is queued to the specified cpus
+ * waiting for all calls to complete in parallel before returning
+ * NOSYNC: function calls are queued
+ * but we return before confirmation of calls completing.
+ * The action function may be NULL.
+ * The cpu mask may include the local cpu. Offline cpus are ignored.
+ * The return value is the number of cpus on which the call was made or queued.
+ */
+cpu_t
+mp_cpus_call(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *),
+ void *arg)
+{
+ return mp_cpus_call1(
+ cpus,
+ mode,
+ (void (*)(void *,void *))action_func,
+ arg,
+ NULL,
+ NULL);
+}
+
+static void
+mp_cpus_call_wait(boolean_t intrs_enabled,
+ cpumask_t cpus_called,
+ cpumask_t *cpus_responded)
+{
+ mp_call_queue_t *cqp;
+ uint64_t tsc_spin_start;
+
+ assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+ cqp = &mp_cpus_call_head[cpu_number()];
+
+ tsc_spin_start = rdtsc64();
+ while (*cpus_responded != cpus_called) {
+ if (!intrs_enabled) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue))
+ mp_cpus_call_action();
+ cpu_signal_handler(NULL);
+ }
+ if (mp_spin_timeout(tsc_spin_start)) {
+ cpumask_t cpus_unresponsive;
+
+ cpus_unresponsive = cpus_called & ~(*cpus_responded);
+ NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
+ panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
+ cpus_unresponsive);
+ }
+ }
+}
+
+cpu_t
+mp_cpus_call1(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *, void *),
+ void *arg0,
+ void *arg1,
+ cpumask_t *cpus_calledp)
+{
+ cpu_t cpu = 0;
+ boolean_t intrs_enabled = FALSE;
+ boolean_t call_self = FALSE;
+ cpumask_t cpus_called = 0;
+ cpumask_t cpus_responded = 0;
+ long cpus_call_count = 0;
+ uint64_t tsc_spin_start;
+ boolean_t topo_lock;
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_START,
+ cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
+
+ if (!smp_initialized) {
+ if ((cpus & CPUMASK_SELF) == 0)
+ goto out;
+ if (action_func != NULL) {
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ call_self = TRUE;
+ goto out;
+ }
+
+ /*
+ * Queue the call for each non-local requested cpu.
+ * This is performed under the topo lock to prevent changes to
+ * cpus online state and to prevent concurrent rendezvouses --
+ * although an exception is made if we're calling only the master
+ * processor since that always remains active. Note: this exception
+ * is expected for longterm timer nosync cross-calls to the master cpu.
+ */
+ mp_disable_preemption();
+ intrs_enabled = ml_get_interrupts_enabled();
+ topo_lock = (cpus != cpu_to_cpumask(master_cpu));
+ if (topo_lock) {
+ ml_set_interrupts_enabled(FALSE);
+ (void) mp_safe_spin_lock(&x86_topo_lock);
+ }
+ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
+ if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
+ !cpu_is_running(cpu))
+ continue;
+ tsc_spin_start = rdtsc64();
+ if (cpu == (cpu_t) cpu_number()) {
+ /*
+ * We don't IPI ourself and if calling asynchronously,
+ * we defer our call until we have signalled all others.
+ */
+ call_self = TRUE;
+ if (mode == SYNC && action_func != NULL) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func),
+ VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
+ action_func(arg0, arg1);
+ }
+ } else {
+ /*
+ * Here to queue a call to cpu and IPI.
+ */
+ mp_call_t *callp = NULL;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+ boolean_t intrs_inner;
+
+ queue_call:
+ if (callp == NULL)
+ callp = mp_call_alloc();
+ intrs_inner = mp_call_head_lock(cqp);
+ if (callp == NULL) {
+ mp_call_head_unlock(cqp, intrs_inner);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_NOBUF,
+ cpu, 0, 0, 0, 0);
+ if (!intrs_inner) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue))
+ mp_cpus_call_action();
+ handle_pending_TLB_flushes();
+ }
+ if (mp_spin_timeout(tsc_spin_start))
+ panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
+ tsc_spin_start, rdtsc64());
+ goto queue_call;
+ }
+ callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
+ callp->func = action_func;
+ callp->arg0 = arg0;
+ callp->arg1 = arg1;
+ mp_call_enqueue_locked(cqp, callp);
+ cpus_call_count++;
+ cpus_called |= cpu_to_cpumask(cpu);
+ i386_signal_cpu(cpu, MP_CALL, ASYNC);
+ mp_call_head_unlock(cqp, intrs_inner);
+ if (mode == SYNC) {
+ mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
+ }
+ }
+ }
+ if (topo_lock) {
+ simple_unlock(&x86_topo_lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+
+ /* Call locally if mode not SYNC */
+ if (mode != SYNC && call_self ) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
+ if (action_func != NULL) {
+ ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ }
+
+ /* For ASYNC, now wait for all signaled cpus to complete their calls */
+ if (mode == ASYNC)
+ mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
+
+ /* Safe to allow pre-emption now */
+ mp_enable_preemption();
+
+out:
+ if (call_self){
+ cpus_called |= cpu_to_cpumask(cpu);
+ cpus_call_count++;
+ }
+
+ if (cpus_calledp)
+ *cpus_calledp = cpus_called;
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_END,
+ cpus_call_count, cpus_called, 0, 0, 0);
+
+ return (cpu_t) cpus_call_count;
+}
+
+
+static void
+mp_broadcast_action(__unused void *null)
+{
+ /* call action function */
+ if (mp_bc_action_func != NULL)
+ mp_bc_action_func(mp_bc_func_arg);
+
+ /* if we're the last one through, wake up the instigator */
+ if (atomic_decl_and_test(&mp_bc_count, 1))
+ thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
+}
+
+/*
+ * mp_broadcast() runs a given function on all active cpus.
+ * The caller blocks until the functions has run on all cpus.
+ * The caller will also block if there is another pending braodcast.
+ */
+void
+mp_broadcast(
+ void (*action_func)(void *),
+ void *arg)
+{
+ if (!smp_initialized) {
+ if (action_func != NULL)
+ action_func(arg);
+ return;
+ }
+
+ /* obtain broadcast lock */
+ lck_mtx_lock(&mp_bc_lock);
+
+ /* set static function pointers */
+ mp_bc_action_func = action_func;
+ mp_bc_func_arg = arg;
+
+ assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
+
+ /*
+ * signal other processors, which will call mp_broadcast_action()
+ */
+ mp_bc_count = real_ncpus; /* assume max possible active */
+ mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1;
+ atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
+
+ /* call executor function on this cpu */
+ mp_broadcast_action(NULL);
+
+ /* block for other cpus to have run action_func */
+ if (mp_bc_ncpus > 1)
+ thread_block(THREAD_CONTINUE_NULL);
+ else
+ clear_wait(current_thread(), THREAD_AWAKENED);
+
+ /* release lock */
+ lck_mtx_unlock(&mp_bc_lock);
+}
+
+void
+mp_cpus_kick(cpumask_t cpus)
+{
+ cpu_t cpu;
+ boolean_t intrs_enabled = FALSE;
+
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ mp_safe_spin_lock(&x86_topo_lock);
+
+ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
+ if ((cpu == (cpu_t) cpu_number())
+ || ((cpu_to_cpumask(cpu) & cpus) == 0)
+ || !cpu_is_running(cpu))
+ {
+ continue;
+ }
+
+ lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
+ }
+
+ simple_unlock(&x86_topo_lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+}
+