+ simple_unlock(&mp_rv_lock);
+}
+
+void
+mp_rendezvous_break_lock(void)
+{
+ simple_lock_init(&mp_rv_lock, 0);
+}
+
+static void
+setup_disable_intrs(__unused void * param_not_used)
+{
+ /* disable interrupts before the first barrier */
+ boolean_t intr = ml_set_interrupts_enabled(FALSE);
+
+ current_cpu_datap()->cpu_iflag = intr;
+ DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
+}
+
+static void
+teardown_restore_intrs(__unused void * param_not_used)
+{
+ /* restore interrupt flag following MTRR changes */
+ ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
+ DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
+}
+
+/*
+ * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
+ * This is exported for use by kexts.
+ */
+void
+mp_rendezvous_no_intrs(
+ void (*action_func)(void *),
+ void *arg)
+{
+ mp_rendezvous(setup_disable_intrs,
+ action_func,
+ teardown_restore_intrs,
+ arg);
+}
+
+
+typedef struct {
+ queue_chain_t link; /* queue linkage */
+ void (*func)(void *, void *); /* routine to call */
+ void *arg0; /* routine's 1st arg */
+ void *arg1; /* routine's 2nd arg */
+ cpumask_t *maskp; /* completion response mask */
+} mp_call_t;
+
+
+typedef struct {
+ queue_head_t queue;
+ decl_simple_lock_data(, lock);
+} mp_call_queue_t;
+#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
+static mp_call_queue_t mp_cpus_call_freelist;
+static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
+
+static inline boolean_t
+mp_call_head_lock(mp_call_queue_t *cqp)
+{
+ boolean_t intrs_enabled;
+
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ simple_lock(&cqp->lock, LCK_GRP_NULL);
+
+ return intrs_enabled;
+}
+
+/*
+ * Deliver an NMIPI to a set of processors to cause them to panic .
+ */
+void
+NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why)
+{
+ unsigned int cpu;
+ cpumask_t cpu_bit;
+ uint64_t deadline;
+
+ NMIPI_enable(TRUE);
+ NMI_panic_reason = why;
+
+ for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+ if ((cpu_mask & cpu_bit) == 0) {
+ continue;
+ }
+ cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
+ cpu_NMI_interrupt(cpu);
+ }
+
+ /* Wait (only so long) for NMi'ed cpus to respond */
+ deadline = mach_absolute_time() + LockTimeOut;
+ for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+ if ((cpu_mask & cpu_bit) == 0) {
+ continue;
+ }
+ while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
+ mach_absolute_time() < deadline) {
+ cpu_pause();
+ }
+ }
+}
+
+#if MACH_ASSERT
+static inline boolean_t
+mp_call_head_is_locked(mp_call_queue_t *cqp)
+{
+ return !ml_get_interrupts_enabled() &&
+ hw_lock_held((hw_lock_t)&cqp->lock);
+}
+#endif
+
+static inline void
+mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
+{
+ simple_unlock(&cqp->lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_alloc(void)
+{
+ mp_call_t *callp = NULL;
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ if (!queue_empty(&cqp->queue)) {
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ }
+ mp_call_head_unlock(cqp, intrs_enabled);
+
+ return callp;
+}
+
+static inline void
+mp_call_free(mp_call_t *callp)
+{
+ boolean_t intrs_enabled;
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ intrs_enabled = mp_call_head_lock(cqp);
+ queue_enter_first(&cqp->queue, callp, typeof(callp), link);
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_dequeue_locked(mp_call_queue_t *cqp)
+{
+ mp_call_t *callp = NULL;
+
+ assert(mp_call_head_is_locked(cqp));
+ if (!queue_empty(&cqp->queue)) {
+ queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+ }
+ return callp;
+}
+
+static inline void
+mp_call_enqueue_locked(
+ mp_call_queue_t *cqp,
+ mp_call_t *callp)
+{
+ queue_enter(&cqp->queue, callp, typeof(callp), link);
+}
+
+/* Called on the boot processor to initialize global structures */
+static void
+mp_cpus_call_init(void)
+{
+ mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+ DBG("mp_cpus_call_init()\n");
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+}
+
+/*
+ * Called at processor registration to add call buffers to the free list
+ * and to initialize the per-cpu call queue.
+ */
+void
+mp_cpus_call_cpu_init(int cpu)
+{
+ int i;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+ mp_call_t *callp;
+
+ simple_lock_init(&cqp->lock, 0);
+ queue_init(&cqp->queue);
+ for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
+ callp = zalloc_permanent_type(mp_call_t);
+ mp_call_free(callp);
+ }
+
+ DBG("mp_cpus_call_init(%d) done\n", cpu);
+}
+
+/*
+ * This is called from cpu_signal_handler() to process an MP_CALL signal.
+ * And also from i386_deactivate_cpu() when a cpu is being taken offline.
+ */
+static void
+mp_cpus_call_action(void)
+{
+ mp_call_queue_t *cqp;
+ boolean_t intrs_enabled;
+ mp_call_t *callp;
+ mp_call_t call;
+
+ assert(!ml_get_interrupts_enabled());
+ cqp = &mp_cpus_call_head[cpu_number()];
+ intrs_enabled = mp_call_head_lock(cqp);
+ while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
+ /* Copy call request to the stack to free buffer */
+ call = *callp;
+ mp_call_free(callp);
+ if (call.func != NULL) {
+ mp_call_head_unlock(cqp, intrs_enabled);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_ACTION,
+ VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
+ VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
+ call.func(call.arg0, call.arg1);
+ (void) mp_call_head_lock(cqp);
+ }
+ if (call.maskp != NULL) {
+ i_bit_set(cpu_number(), call.maskp);
+ }
+ }
+ mp_call_head_unlock(cqp, intrs_enabled);
+}
+
+/*
+ * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
+ * Possible modes are:
+ * SYNC: function is called serially on target cpus in logical cpu order
+ * waiting for each call to be acknowledged before proceeding
+ * ASYNC: function call is queued to the specified cpus
+ * waiting for all calls to complete in parallel before returning
+ * NOSYNC: function calls are queued
+ * but we return before confirmation of calls completing.
+ * The action function may be NULL.
+ * The cpu mask may include the local cpu. Offline cpus are ignored.
+ * The return value is the number of cpus on which the call was made or queued.
+ */
+cpu_t
+mp_cpus_call(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *),
+ void *arg)
+{
+ return mp_cpus_call1(
+ cpus,
+ mode,
+ (void (*)(void *, void *))action_func,
+ arg,
+ NULL,
+ NULL);
+}
+
+static void
+mp_cpus_call_wait(boolean_t intrs_enabled,
+ cpumask_t cpus_called,
+ cpumask_t *cpus_responded)
+{
+ mp_call_queue_t *cqp;
+ uint64_t tsc_spin_start;
+
+ assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+ cqp = &mp_cpus_call_head[cpu_number()];
+
+ tsc_spin_start = rdtsc64();
+ while (*cpus_responded != cpus_called) {
+ if (!intrs_enabled) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue)) {
+ mp_cpus_call_action();
+ }
+ cpu_signal_handler(NULL);
+ }
+ if (mp_spin_timeout(tsc_spin_start)) {
+ cpumask_t cpus_unresponsive;
+
+ cpus_unresponsive = cpus_called & ~(*cpus_responded);
+ NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
+ panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
+ cpus_unresponsive);
+ }
+ }
+}
+
+cpu_t
+mp_cpus_call1(
+ cpumask_t cpus,
+ mp_sync_t mode,
+ void (*action_func)(void *, void *),
+ void *arg0,
+ void *arg1,
+ cpumask_t *cpus_calledp)
+{
+ cpu_t cpu = 0;
+ boolean_t intrs_enabled = FALSE;
+ boolean_t call_self = FALSE;
+ cpumask_t cpus_called = 0;
+ cpumask_t cpus_responded = 0;
+ long cpus_call_count = 0;
+ uint64_t tsc_spin_start;
+ boolean_t topo_lock;
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_START,
+ cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
+
+ if (!smp_initialized) {
+ if ((cpus & CPUMASK_SELF) == 0) {
+ goto out;
+ }
+ if (action_func != NULL) {
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ call_self = TRUE;
+ goto out;
+ }
+
+ /*
+ * Queue the call for each non-local requested cpu.
+ * This is performed under the topo lock to prevent changes to
+ * cpus online state and to prevent concurrent rendezvouses --
+ * although an exception is made if we're calling only the master
+ * processor since that always remains active. Note: this exception
+ * is expected for longterm timer nosync cross-calls to the master cpu.
+ */
+ mp_disable_preemption();
+ intrs_enabled = ml_get_interrupts_enabled();
+ topo_lock = (cpus != cpu_to_cpumask(master_cpu));
+ if (topo_lock) {
+ ml_set_interrupts_enabled(FALSE);
+ (void) mp_safe_spin_lock(&x86_topo_lock);
+ }
+ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
+ if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
+ !cpu_is_running(cpu)) {
+ continue;
+ }
+ tsc_spin_start = rdtsc64();
+ if (cpu == (cpu_t) cpu_number()) {
+ /*
+ * We don't IPI ourself and if calling asynchronously,
+ * we defer our call until we have signalled all others.
+ */
+ call_self = TRUE;
+ if (mode == SYNC && action_func != NULL) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func),
+ VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
+ action_func(arg0, arg1);
+ }
+ } else {
+ /*
+ * Here to queue a call to cpu and IPI.
+ */
+ mp_call_t *callp = NULL;
+ mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+ boolean_t intrs_inner;
+
+queue_call:
+ if (callp == NULL) {
+ callp = mp_call_alloc();
+ }
+ intrs_inner = mp_call_head_lock(cqp);
+ if (callp == NULL) {
+ mp_call_head_unlock(cqp, intrs_inner);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_NOBUF,
+ cpu, 0, 0, 0, 0);
+ if (!intrs_inner) {
+ /* Sniffing w/o locking */
+ if (!queue_empty(&cqp->queue)) {
+ mp_cpus_call_action();
+ }
+ handle_pending_TLB_flushes();
+ }
+ if (mp_spin_timeout(tsc_spin_start)) {
+ panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
+ tsc_spin_start, rdtsc64());
+ }
+ goto queue_call;
+ }
+ callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
+ callp->func = action_func;
+ callp->arg0 = arg0;
+ callp->arg1 = arg1;
+ mp_call_enqueue_locked(cqp, callp);
+ cpus_call_count++;
+ cpus_called |= cpu_to_cpumask(cpu);
+ i386_signal_cpu(cpu, MP_CALL, ASYNC);
+ mp_call_head_unlock(cqp, intrs_inner);
+ if (mode == SYNC) {
+ mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
+ }
+ }
+ }
+ if (topo_lock) {
+ simple_unlock(&x86_topo_lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+
+ /* Call locally if mode not SYNC */
+ if (mode != SYNC && call_self) {
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL_LOCAL,
+ VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
+ if (action_func != NULL) {
+ ml_set_interrupts_enabled(FALSE);
+ action_func(arg0, arg1);
+ ml_set_interrupts_enabled(intrs_enabled);
+ }
+ }
+
+ /* For ASYNC, now wait for all signaled cpus to complete their calls */
+ if (mode == ASYNC) {
+ mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
+ }
+
+ /* Safe to allow pre-emption now */
+ mp_enable_preemption();
+
+out:
+ if (call_self) {
+ cpus_called |= cpu_to_cpumask(cpu);
+ cpus_call_count++;
+ }
+
+ if (cpus_calledp) {
+ *cpus_calledp = cpus_called;
+ }
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPUS_CALL | DBG_FUNC_END,
+ cpus_call_count, cpus_called, 0, 0, 0);
+
+ return (cpu_t) cpus_call_count;
+}
+
+
+static void
+mp_broadcast_action(__unused void *null)
+{
+ /* call action function */
+ if (mp_bc_action_func != NULL) {
+ mp_bc_action_func(mp_bc_func_arg);
+ }
+
+ /* if we're the last one through, wake up the instigator */
+ if (atomic_decl_and_test(&mp_bc_count, 1)) {
+ thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
+ }
+}
+
+/*
+ * mp_broadcast() runs a given function on all active cpus.
+ * The caller blocks until the functions has run on all cpus.
+ * The caller will also block if there is another pending broadcast.
+ */
+void
+mp_broadcast(
+ void (*action_func)(void *),
+ void *arg)
+{
+ if (!smp_initialized) {
+ if (action_func != NULL) {
+ action_func(arg);
+ }
+ return;
+ }
+
+ /* obtain broadcast lock */
+ lck_mtx_lock(&mp_bc_lock);
+
+ /* set static function pointers */
+ mp_bc_action_func = action_func;
+ mp_bc_func_arg = arg;
+
+ assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
+
+ /*
+ * signal other processors, which will call mp_broadcast_action()
+ */
+ mp_bc_count = real_ncpus; /* assume max possible active */
+ mp_bc_ncpus = mp_cpus_call(CPUMASK_ALL, NOSYNC, *mp_broadcast_action, NULL);
+ atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
+
+ /* block for other cpus to have run action_func */
+ if (mp_bc_ncpus > 1) {
+ thread_block(THREAD_CONTINUE_NULL);
+ } else {
+ clear_wait(current_thread(), THREAD_AWAKENED);
+ }
+
+ /* release lock */
+ lck_mtx_unlock(&mp_bc_lock);
+}
+
+void
+mp_cpus_kick(cpumask_t cpus)
+{
+ cpu_t cpu;
+ boolean_t intrs_enabled = FALSE;
+
+ intrs_enabled = ml_set_interrupts_enabled(FALSE);
+ mp_safe_spin_lock(&x86_topo_lock);
+
+ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
+ if (((cpu_to_cpumask(cpu) & cpus) == 0)
+ || !cpu_is_running(cpu)) {
+ continue;
+ }
+
+ lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
+ }
+
+ simple_unlock(&x86_topo_lock);
+ ml_set_interrupts_enabled(intrs_enabled);
+}
+
+void
+i386_activate_cpu(void)
+{
+ cpu_data_t *cdp = current_cpu_datap();
+
+ assert(!ml_get_interrupts_enabled());
+
+ if (!smp_initialized) {
+ cdp->cpu_running = TRUE;
+ return;
+ }
+
+ mp_safe_spin_lock(&x86_topo_lock);
+ cdp->cpu_running = TRUE;
+ started_cpu();
+ pmap_tlbi_range(0, ~0ULL, true, 0);
+ simple_unlock(&x86_topo_lock);
+}
+
+void
+i386_deactivate_cpu(void)
+{
+ cpu_data_t *cdp = current_cpu_datap();
+
+ assert(!ml_get_interrupts_enabled());
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+
+ mp_safe_spin_lock(&x86_topo_lock);
+ cdp->cpu_running = FALSE;
+ simple_unlock(&x86_topo_lock);
+
+ /*
+ * Move all of this cpu's timers to the master/boot cpu,
+ * and poke it in case there's a sooner deadline for it to schedule.
+ */
+ timer_queue_shutdown(&cdp->rtclock_timer.queue);
+ mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
+
+#if MONOTONIC
+ mt_cpu_down(cdp);
+#endif /* MONOTONIC */
+
+ /*
+ * Open an interrupt window
+ * and ensure any pending IPI or timer is serviced
+ */
+ mp_disable_preemption();
+ ml_set_interrupts_enabled(TRUE);
+
+ while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime) {
+ cpu_pause();
+ }
+ /*
+ * Ensure there's no remaining timer deadline set
+ * - AICPM may have left one active.
+ */
+ setPop(0);
+
+ ml_set_interrupts_enabled(FALSE);
+ mp_enable_preemption();
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
+ 0, 0, 0, 0, 0);
+}
+
+int pmsafe_debug = 1;
+
+#if MACH_KDP
+volatile boolean_t mp_kdp_trap = FALSE;
+volatile boolean_t mp_kdp_is_NMI = FALSE;
+volatile unsigned long mp_kdp_ncpus;
+boolean_t mp_kdp_state;
+
+
+void
+mp_kdp_enter(boolean_t proceed_on_failure)
+{
+ unsigned int cpu;
+ unsigned int ncpus = 0;
+ unsigned int my_cpu;
+ uint64_t tsc_timeout;