X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/bd504ef0e0b883cdd7917b73b3574eb9ce669905..813fb2f63a553c957e917ede5f119b021d6ce391:/osfmk/i386/mp.c diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 286b822aa..269ca1914 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,6 +31,7 @@ #include #include +#include #include #include @@ -46,15 +48,17 @@ #include #include #include -#include +#include #include #include +#include #include #include #include +#include #include #include #include @@ -79,6 +83,8 @@ #include +#include + #if MP_DEBUG #define PAUSE delay(1000000) #define DBG(x...) kprintf(x) @@ -102,11 +108,14 @@ void slave_boot_init(void); void i386_cpu_IPI(int cpu); +#if MACH_KDP static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); -static void mp_rendezvous_action(void); -static void mp_broadcast_action(void); +#endif /* MACH_KDP */ +static boolean_t mp_safe_spin_lock(usimple_lock_t lock); +#if MACH_KDP static boolean_t cpu_signal_pending(int cpu, mp_event_t event); +#endif /* MACH_KDP */ static int NMIInterruptHandler(x86_saved_state_t *regs); boolean_t smp_initialized = FALSE; @@ -115,6 +124,9 @@ volatile boolean_t force_immediate_debugger_NMI = FALSE; volatile boolean_t pmap_tlb_flush_timeout = FALSE; decl_simple_lock_data(,mp_kdp_lock); +decl_simple_lock_data(,debugger_callback_lock); +struct debugger_callback *debugger_callback = NULL; + decl_lck_mtx_data(static, mp_cpu_boot_lock); lck_mtx_ext_t mp_cpu_boot_lock_ext; @@ -154,13 +166,17 @@ static volatile long mp_bc_count; decl_lck_mtx_data(static, mp_bc_lock); lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; -volatile long NMIPI_acks = 0; +volatile long NMIPI_acks = 0; +volatile long NMI_count = 0; + +extern void NMI_cpus(void); static void mp_cpus_call_init(void); -static void mp_cpus_call_cpu_init(void); static void mp_cpus_call_action(void); static void mp_call_PM(void); +static boolean_t mp_cpus_call_wait_timeout = FALSE; + char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init /* PAL-related routines */ @@ -213,6 +229,7 @@ smp_init(void) { simple_lock_init(&mp_kdp_lock, 0); simple_lock_init(&mp_rv_lock, 0); + simple_lock_init(&debugger_callback_lock, 0); lck_grp_attr_setdefault(&smp_lck_grp_attr); lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr); lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); @@ -229,7 +246,7 @@ smp_init(void) DBGLOG_CPU_INIT(master_cpu); mp_cpus_call_init(); - mp_cpus_call_cpu_init(); + mp_cpus_call_cpu_init(master_cpu); if (PE_parse_boot_argn("TSC_sync_margin", &TSC_sync_margin, sizeof(TSC_sync_margin))) { @@ -427,10 +444,7 @@ intel_startCPU( * Initialize (or re-initialize) the descriptor tables for this cpu. * Propagate processor mode to slave. */ - if (cpu_mode_is64bit()) - cpu_desc_init64(cpu_datap(slot_num)); - else - cpu_desc_init(cpu_datap(slot_num)); + cpu_desc_init64(cpu_datap(slot_num)); /* Serialize use of the slave boot stack, etc. */ lck_mtx_lock(&mp_cpu_boot_lock); @@ -480,9 +494,15 @@ MP_EVENT_NAME_DECL(); #endif /* MP_DEBUG */ +/* + * Note: called with NULL state when polling for TLB flush and cross-calls. + */ int cpu_signal_handler(x86_saved_state_t *regs) { +#if !MACH_KDP +#pragma unused (regs) +#endif /* !MACH_KDP */ int my_cpu; volatile int *my_word; @@ -519,18 +539,6 @@ cpu_signal_handler(x86_saved_state_t *regs) DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); i_bit_clear(MP_TLB_FLUSH, my_word); pmap_update_interrupt(); - } else if (i_bit(MP_AST, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_AST); - i_bit_clear(MP_AST, my_word); - ast_check(cpu_to_processor(my_cpu)); - } else if (i_bit(MP_RENDEZVOUS, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); - i_bit_clear(MP_RENDEZVOUS, my_word); - mp_rendezvous_action(); - } else if (i_bit(MP_BROADCAST, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_BROADCAST); - i_bit_clear(MP_BROADCAST, my_word); - mp_broadcast_action(); } else if (i_bit(MP_CHUD, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_CHUD); i_bit_clear(MP_CHUD, my_word); @@ -544,15 +552,24 @@ cpu_signal_handler(x86_saved_state_t *regs) i_bit_clear(MP_CALL_PM, my_word); mp_call_PM(); } + if (regs == NULL) { + /* Called to poll only for cross-calls and TLB flush */ + break; + } else if (i_bit(MP_AST, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_AST); + i_bit_clear(MP_AST, my_word); + ast_check(cpu_to_processor(my_cpu)); + } } while (*my_word); return 0; } +extern void kprintf_break_lock(void); static int NMIInterruptHandler(x86_saved_state_t *regs) { - void *stackptr; + void *stackptr; if (panic_active() && !panicDebugging) { if (pmsafe_debug) @@ -562,31 +579,58 @@ NMIInterruptHandler(x86_saved_state_t *regs) } atomic_incl(&NMIPI_acks, 1); + atomic_incl(&NMI_count, 1); sync_iss_to_iks_unconditionally(regs); -#if defined (__i386__) - __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); -#elif defined (__x86_64__) __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); -#endif if (cpu_number() == debugger_cpu) - goto NMExit; + goto NMExit; if (spinlock_timed_out) { char pstr[192]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + } else if (mp_cpus_call_wait_timeout) { + char pstr[192]; + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number()); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); } else if (pmap_tlb_flush_timeout == TRUE) { char pstr[128]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); - } + } #if MACH_KDP if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); current_cpu_datap()->cpu_NMI_acknowledged = TRUE; - mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); + i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals); + if (pmap_tlb_flush_timeout || + spinlock_timed_out || + mp_cpus_call_wait_timeout || + panic_active()) { + mp_kdp_wait(FALSE, TRUE); + } else if (!mp_kdp_trap && + !mp_kdp_is_NMI && + virtualized && (debug_boot_arg & DB_NMI)) { + /* + * Under a VMM with the debug boot-arg set, drop into kdp. + * Since an NMI is involved, there's a risk of contending with + * a panic. And side-effects of NMIs may result in entry into, + * and continuing from, the debugger being unreliable. + */ + if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) { + kprintf_break_lock(); + kprintf("Debugger entry requested by NMI\n"); + kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0); + printf("Debugger entry requested by NMI\n"); + mp_kdp_is_NMI = FALSE; + } else { + mp_kdp_wait(FALSE, FALSE); + } + } else { + mp_kdp_wait(FALSE, FALSE); + } if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); #endif @@ -625,6 +669,35 @@ cpu_NMI_interrupt(int cpu) } } +void +NMI_cpus(void) +{ + unsigned int cpu; + boolean_t intrs_enabled; + uint64_t tsc_timeout; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (!cpu_datap(cpu)->cpu_running) + continue; + cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; + cpu_NMI_interrupt(cpu); + tsc_timeout = !machine_timeout_suspended() ? + rdtsc64() + (1000 * 1000 * 1000 * 10ULL) : + ~0ULL; + while (!cpu_datap(cpu)->cpu_NMI_acknowledged) { + handle_pending_TLB_flushes(); + cpu_pause(); + if (rdtsc64() > tsc_timeout) + panic("NMI_cpus() timeout cpu %d", cpu); + } + cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; + } + + ml_set_interrupts_enabled(intrs_enabled); +} + static void (* volatile mp_PM_func)(void) = NULL; static void @@ -674,7 +747,9 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) i386_cpu_IPI(cpu); if (mode == SYNC) { again: - tsc_timeout = rdtsc64() + (1000*1000*1000); + tsc_timeout = !machine_timeout_suspended() ? + rdtsc64() + (1000*1000*1000) : + ~0ULL; while (i_bit(event, signals) && rdtsc64() < tsc_timeout) { cpu_pause(); } @@ -689,41 +764,58 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) } /* - * Send event to all running cpus. - * Called with the topology locked. + * Helper function called when busy-waiting: panic if too long + * a TSC-based time has elapsed since the start of the spin. */ -void -i386_signal_cpus(mp_event_t event, mp_sync_t mode) +static boolean_t +mp_spin_timeout(uint64_t tsc_start) { - unsigned int cpu; - unsigned int my_cpu = cpu_number(); + uint64_t tsc_timeout; - assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); + cpu_pause(); + if (machine_timeout_suspended()) + return FALSE; - for (cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) - continue; - i386_signal_cpu(cpu, event, mode); - } + /* + * The timeout is 4 * the spinlock timeout period + * unless we have serial console printing (kprintf) enabled + * in which case we allow an even greater margin. + */ + tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2 + : LockTimeOutTSC << 4; + return (rdtsc64() > tsc_start + tsc_timeout); } /* - * Return the number of running cpus. - * Called with the topology locked. + * Helper function to take a spinlock while ensuring that incoming IPIs + * are still serviced if interrupts are masked while we spin. + * Returns current interrupt state. */ -int -i386_active_cpus(void) +static boolean_t +mp_safe_spin_lock(usimple_lock_t lock) { - unsigned int cpu; - unsigned int ncpus = 0; - - assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); - - for (cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu_datap(cpu)->cpu_running) - ncpus++; - } - return(ncpus); + if (ml_get_interrupts_enabled()) { + simple_lock(lock); + return TRUE; + } else { + uint64_t tsc_spin_start = rdtsc64(); + while (!simple_lock_try(lock)) { + cpu_signal_handler(NULL); + if (mp_spin_timeout(tsc_spin_start)) { + uint32_t lock_cpu; + uintptr_t lowner = (uintptr_t) + lock->interlock.lock_data; + spinlock_timed_out = lock; + lock_cpu = spinlock_timeout_NMI(lowner); + panic("mp_safe_spin_lock() timed out," + " lock: %p, owner thread: 0x%lx," + " current_thread: %p, owner on CPU 0x%x", + lock, lowner, + current_thread(), lock_cpu); + } + } + return FALSE; + } } /* @@ -741,9 +833,10 @@ i386_active_cpus(void) */ static void -mp_rendezvous_action(void) +mp_rendezvous_action(__unused void *null) { - boolean_t intrs_enabled; + boolean_t intrs_enabled; + uint64_t tsc_spin_start; /* setup function */ if (mp_rv_setup_func != NULL) @@ -753,11 +846,15 @@ mp_rendezvous_action(void) /* spin on entry rendezvous */ atomic_incl(&mp_rv_entry, 1); + tsc_spin_start = rdtsc64(); + while (mp_rv_entry < mp_rv_ncpus) { /* poll for pesky tlb flushes if interrupts disabled */ if (!intrs_enabled) handle_pending_TLB_flushes(); - cpu_pause(); + if (mp_spin_timeout(tsc_spin_start)) { + panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64()); + } } /* action function */ @@ -766,10 +863,12 @@ mp_rendezvous_action(void) /* spin on exit rendezvous */ atomic_incl(&mp_rv_exit, 1); + tsc_spin_start = rdtsc64(); while (mp_rv_exit < mp_rv_ncpus) { if (!intrs_enabled) handle_pending_TLB_flushes(); - cpu_pause(); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64()); } /* teardown function */ @@ -786,6 +885,7 @@ mp_rendezvous(void (*setup_func)(void *), void (*teardown_func)(void *), void *arg) { + uint64_t tsc_spin_start; if (!smp_initialized) { if (setup_func != NULL) @@ -798,7 +898,7 @@ mp_rendezvous(void (*setup_func)(void *), } /* obtain rendezvous lock */ - simple_lock(&mp_rv_lock); + (void) mp_safe_spin_lock(&mp_rv_lock); /* set static function pointers */ mp_rv_setup_func = setup_func; @@ -814,21 +914,20 @@ mp_rendezvous(void (*setup_func)(void *), * signal other processors, which will call mp_rendezvous_action() * with interrupts disabled */ - simple_lock(&x86_topo_lock); - mp_rv_ncpus = i386_active_cpus(); - i386_signal_cpus(MP_RENDEZVOUS, ASYNC); - simple_unlock(&x86_topo_lock); + mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1; /* call executor function on this cpu */ - mp_rendezvous_action(); + mp_rendezvous_action(NULL); /* * Spin for everyone to complete. * This is necessary to ensure that all processors have proceeded * from the exit barrier before we release the rendezvous structure. */ + tsc_spin_start = rdtsc64(); while (mp_rv_complete < mp_rv_ncpus) { - cpu_pause(); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64()); } /* Tidy up */ @@ -886,7 +985,7 @@ typedef struct { void (*func)(void *,void *); /* routine to call */ void *arg0; /* routine's 1st arg */ void *arg1; /* routine's 2nd arg */ - volatile long *countp; /* completion counter */ + cpumask_t *maskp; /* completion response mask */ } mp_call_t; @@ -909,12 +1008,28 @@ mp_call_head_lock(mp_call_queue_t *cqp) return intrs_enabled; } +void +mp_cpus_NMIPI(cpumask_t cpu_mask) { + unsigned int cpu, cpu_bit; + uint64_t deadline; + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (cpu_mask & cpu_bit) + cpu_NMI_interrupt(cpu); + } + deadline = mach_absolute_time() + (LockTimeOut); + while (mach_absolute_time() < deadline) + cpu_pause(); +} + +#if MACH_ASSERT static inline boolean_t mp_call_head_is_locked(mp_call_queue_t *cqp) { return !ml_get_interrupts_enabled() && hw_lock_held((hw_lock_t)&cqp->lock); } +#endif static inline void mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled) @@ -980,20 +1095,16 @@ mp_cpus_call_init(void) } /* - * Called by each processor to add call buffers to the free list + * Called at processor registration to add call buffers to the free list * and to initialize the per-cpu call queue. - * Also called but ignored on slave processors on re-start/wake. */ -static void -mp_cpus_call_cpu_init(void) +void +mp_cpus_call_cpu_init(int cpu) { int i; - mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()]; + mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; mp_call_t *callp; - if (cqp->queue.next != NULL) - return; /* restart/wake case: called already */ - simple_lock_init(&cqp->lock, 0); queue_init(&cqp->queue); for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) { @@ -1001,7 +1112,7 @@ mp_cpus_call_cpu_init(void) mp_call_free(callp); } - DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number()); + DBG("mp_cpus_call_init(%d) done\n", cpu); } /* @@ -1027,12 +1138,13 @@ mp_cpus_call_action(void) mp_call_head_unlock(cqp, intrs_enabled); KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_ACTION, - call.func, call.arg0, call.arg1, call.countp, 0); + VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0), + VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0); call.func(call.arg0, call.arg1); (void) mp_call_head_lock(cqp); } - if (call.countp != NULL) - atomic_incl(call.countp, 1); + if (call.maskp != NULL) + i_bit_set(cpu_number(), call.maskp); } mp_call_head_unlock(cqp, intrs_enabled); } @@ -1063,27 +1175,37 @@ mp_cpus_call( (void (*)(void *,void *))action_func, arg, NULL, - NULL, NULL); } static void mp_cpus_call_wait(boolean_t intrs_enabled, - long mp_cpus_signals, - volatile long *mp_cpus_calls) + cpumask_t cpus_called, + cpumask_t *cpus_responded) { mp_call_queue_t *cqp; + uint64_t tsc_spin_start; + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); cqp = &mp_cpus_call_head[cpu_number()]; - while (*mp_cpus_calls < mp_cpus_signals) { + tsc_spin_start = rdtsc64(); + while (*cpus_responded != cpus_called) { if (!intrs_enabled) { /* Sniffing w/o locking */ if (!queue_empty(&cqp->queue)) mp_cpus_call_action(); - handle_pending_TLB_flushes(); + cpu_signal_handler(NULL); + } + if (mp_spin_timeout(tsc_spin_start)) { + cpumask_t cpus_unresponsive; + + mp_cpus_call_wait_timeout = TRUE; + cpus_unresponsive = cpus_called & ~(*cpus_responded); + mp_cpus_NMIPI(cpus_unresponsive); + panic("mp_cpus_call_wait() timeout, cpus: 0x%llx", + cpus_unresponsive); } - cpu_pause(); } } @@ -1094,20 +1216,20 @@ mp_cpus_call1( void (*action_func)(void *, void *), void *arg0, void *arg1, - cpumask_t *cpus_calledp, - cpumask_t *cpus_notcalledp) + cpumask_t *cpus_calledp) { - cpu_t cpu; + cpu_t cpu = 0; boolean_t intrs_enabled = FALSE; boolean_t call_self = FALSE; cpumask_t cpus_called = 0; - cpumask_t cpus_notcalled = 0; - long mp_cpus_signals = 0; - volatile long mp_cpus_calls = 0; + cpumask_t cpus_responded = 0; + long cpus_call_count = 0; + uint64_t tsc_spin_start; + boolean_t topo_lock; KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL | DBG_FUNC_START, - cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1); + cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1)); if (!smp_initialized) { if ((cpus & CPUMASK_SELF) == 0) @@ -1123,90 +1245,89 @@ mp_cpus_call1( /* * Queue the call for each non-local requested cpu. - * The topo lock is not taken. Instead we sniff the cpu_running state - * and then re-check it after taking the call lock. A cpu being taken - * offline runs the action function after clearing the cpu_running. + * This is performed under the topo lock to prevent changes to + * cpus online state and to prevent concurrent rendezvouses -- + * although an exception is made if we're calling only the master + * processor since that always remains active. Note: this exception + * is expected for longterm timer nosync cross-calls to the master cpu. */ + mp_disable_preemption(); + intrs_enabled = ml_get_interrupts_enabled(); + topo_lock = (cpus != cpu_to_cpumask(master_cpu)); + if (topo_lock) { + ml_set_interrupts_enabled(FALSE); + (void) mp_safe_spin_lock(&x86_topo_lock); + } for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if (((cpu_to_cpumask(cpu) & cpus) == 0) || !cpu_datap(cpu)->cpu_running) continue; + tsc_spin_start = rdtsc64(); if (cpu == (cpu_t) cpu_number()) { /* * We don't IPI ourself and if calling asynchronously, * we defer our call until we have signalled all others. */ call_self = TRUE; - cpus_called |= cpu_to_cpumask(cpu); if (mode == SYNC && action_func != NULL) { KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_LOCAL, VM_KERNEL_UNSLIDE(action_func), - arg0, arg1, 0, 0); + VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0); action_func(arg0, arg1); } } else { /* * Here to queue a call to cpu and IPI. - * Spinning for request buffer unless NOSYNC. */ mp_call_t *callp = NULL; mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; + boolean_t intrs_inner; queue_call: if (callp == NULL) callp = mp_call_alloc(); - intrs_enabled = mp_call_head_lock(cqp); - if (!cpu_datap(cpu)->cpu_running) { - mp_call_head_unlock(cqp, intrs_enabled); - continue; - } - if (mode == NOSYNC) { - if (callp == NULL) { - cpus_notcalled |= cpu_to_cpumask(cpu); - mp_call_head_unlock(cqp, intrs_enabled); - KERNEL_DEBUG_CONSTANT( - TRACE_MP_CPUS_CALL_NOBUF, - cpu, 0, 0, 0, 0); - continue; - } - callp->countp = NULL; - } else { - if (callp == NULL) { - mp_call_head_unlock(cqp, intrs_enabled); - KERNEL_DEBUG_CONSTANT( - TRACE_MP_CPUS_CALL_NOBUF, - cpu, 0, 0, 0, 0); - if (!intrs_enabled) { - /* Sniffing w/o locking */ - if (!queue_empty(&cqp->queue)) - mp_cpus_call_action(); - handle_pending_TLB_flushes(); - } - cpu_pause(); - goto queue_call; + intrs_inner = mp_call_head_lock(cqp); + if (callp == NULL) { + mp_call_head_unlock(cqp, intrs_inner); + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_NOBUF, + cpu, 0, 0, 0, 0); + if (!intrs_inner) { + /* Sniffing w/o locking */ + if (!queue_empty(&cqp->queue)) + mp_cpus_call_action(); + handle_pending_TLB_flushes(); } - callp->countp = &mp_cpus_calls; + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx", + tsc_spin_start, rdtsc64()); + goto queue_call; } + callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded; callp->func = action_func; callp->arg0 = arg0; callp->arg1 = arg1; mp_call_enqueue_locked(cqp, callp); - mp_cpus_signals++; + cpus_call_count++; cpus_called |= cpu_to_cpumask(cpu); i386_signal_cpu(cpu, MP_CALL, ASYNC); - mp_call_head_unlock(cqp, intrs_enabled); + mp_call_head_unlock(cqp, intrs_inner); if (mode == SYNC) { - mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); + mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded); } } } + if (topo_lock) { + simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); + } /* Call locally if mode not SYNC */ if (mode != SYNC && call_self ) { KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_LOCAL, - VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0); + VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0); if (action_func != NULL) { ml_set_interrupts_enabled(FALSE); action_func(arg0, arg1); @@ -1215,28 +1336,31 @@ mp_cpus_call1( } /* For ASYNC, now wait for all signaled cpus to complete their calls */ - if (mode == ASYNC) { - mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); - } + if (mode == ASYNC) + mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded); + + /* Safe to allow pre-emption now */ + mp_enable_preemption(); out: - cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0); + if (call_self){ + cpus_called |= cpu_to_cpumask(cpu); + cpus_call_count++; + } if (cpus_calledp) *cpus_calledp = cpus_called; - if (cpus_notcalledp) - *cpus_notcalledp = cpus_notcalled; KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL | DBG_FUNC_END, - cpu, cpus_called, cpus_notcalled, 0, 0); + cpus_call_count, cpus_called, 0, 0, 0); - return cpu; + return (cpu_t) cpus_call_count; } static void -mp_broadcast_action(void) +mp_broadcast_action(__unused void *null) { /* call action function */ if (mp_bc_action_func != NULL) @@ -1275,16 +1399,14 @@ mp_broadcast( /* * signal other processors, which will call mp_broadcast_action() */ - simple_lock(&x86_topo_lock); - mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */ - mp_bc_count = mp_bc_ncpus; - i386_signal_cpus(MP_BROADCAST, ASYNC); + mp_bc_count = real_ncpus; /* assume max possible active */ + mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1; + atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */ /* call executor function on this cpu */ - mp_broadcast_action(); - simple_unlock(&x86_topo_lock); + mp_broadcast_action(NULL); - /* block for all cpus to have run action_func */ + /* block for other cpus to have run action_func */ if (mp_bc_ncpus > 1) thread_block(THREAD_CONTINUE_NULL); else @@ -1294,6 +1416,30 @@ mp_broadcast( lck_mtx_unlock(&mp_bc_lock); } +void +mp_cpus_kick(cpumask_t cpus) +{ + cpu_t cpu; + boolean_t intrs_enabled = FALSE; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + mp_safe_spin_lock(&x86_topo_lock); + + for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { + if ((cpu == (cpu_t) cpu_number()) + || ((cpu_to_cpumask(cpu) & cpus) == 0) + || (!cpu_datap(cpu)->cpu_running)) + { + continue; + } + + lapic_send_ipi(cpu, LAPIC_VECTOR(KICK)); + } + + simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); +} + void i386_activate_cpu(void) { @@ -1313,8 +1459,6 @@ i386_activate_cpu(void) flush_tlb_raw(); } -extern void etimer_timer_expire(void *arg); - void i386_deactivate_cpu(void) { @@ -1335,7 +1479,7 @@ i386_deactivate_cpu(void) * and poke it in case there's a sooner deadline for it to schedule. */ timer_queue_shutdown(&cdp->rtclock_timer.queue); - mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL); + mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL); /* * Open an interrupt window @@ -1364,6 +1508,7 @@ int pmsafe_debug = 1; #if MACH_KDP volatile boolean_t mp_kdp_trap = FALSE; +volatile boolean_t mp_kdp_is_NMI = FALSE; volatile unsigned long mp_kdp_ncpus; boolean_t mp_kdp_state; @@ -1378,6 +1523,11 @@ mp_kdp_enter(void) DBG("mp_kdp_enter()\n"); +#if DEBUG + if (!smp_initialized) + simple_lock_init(&mp_kdp_lock, 0); +#endif + /* * Here to enter the debugger. * In case of races, only one cpu is allowed to enter kdp after @@ -1435,10 +1585,7 @@ mp_kdp_enter(void) * "unsafe-to-interrupt" points such as the trampolines, * but neither do we want to lose state by waiting too long. */ - tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL); - - if (virtualized) - tsc_timeout = ~0ULL; + tsc_timeout = rdtsc64() + (LockTimeOutTSC); while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { /* @@ -1454,12 +1601,22 @@ mp_kdp_enter(void) * interrupt them with an NMI via the local APIC. */ if (mp_kdp_ncpus != ncpus) { + DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu); for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; if (cpu_signal_pending(cpu, MP_KDP)) cpu_NMI_interrupt(cpu); } + /* Wait again for the same timeout */ + tsc_timeout = rdtsc64() + (LockTimeOutTSC); + while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { + handle_pending_TLB_flushes(); + cpu_pause(); + } + if (mp_kdp_ncpus != ncpus) { + panic("mp_kdp_enter() timed-out waiting after NMI"); + } } } else @@ -1522,8 +1679,7 @@ static void mp_kdp_wait(boolean_t flush, boolean_t isNMI) { DBG("mp_kdp_wait()\n"); - /* If an I/O port has been specified as a debugging aid, issue a read */ - panic_io_port_read(); + current_cpu_datap()->debugger_ipi_time = mach_absolute_time(); #if CONFIG_MCA /* If we've trapped due to a machine-check, save MCA registers */ @@ -1559,7 +1715,7 @@ mp_kdp_exit(void) debugger_exit_time = mach_absolute_time(); mp_kdp_trap = FALSE; - __asm__ volatile("mfence"); + mfence(); /* Wait other processors to stop spinning. XXX needs timeout */ DBG("mp_kdp_exit() waiting for processors to resume\n"); @@ -1583,10 +1739,39 @@ mp_kdp_exit(void) (void) ml_set_interrupts_enabled(mp_kdp_state); postcode(0); } + +#define TRAP_DEBUGGER __asm__ volatile("int3") + +kern_return_t +DebuggerWithCallback(kern_return_t (*callback) (void*), + void *callback_context, + boolean_t proceed_on_sync_failure) +{ + simple_lock(&debugger_callback_lock); + + struct debugger_callback callback_buf = { + .callback = callback, + .callback_context = callback_context, + .proceed_on_sync_failure = proceed_on_sync_failure, + .error = KERN_FAILURE + }; + + assert(debugger_callback == NULL); + debugger_callback = &callback_buf; + + TRAP_DEBUGGER; + + debugger_callback = NULL; + + simple_unlock(&debugger_callback_lock); + + return callback_buf.error; +} + #endif /* MACH_KDP */ boolean_t -mp_recent_debugger_activity() { +mp_recent_debugger_activity(void) { uint64_t abstime = mach_absolute_time(); return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) || ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance)); @@ -1624,11 +1809,8 @@ slave_machine_init(void *param) * Cold start */ clock_init(); - cpu_machine_init(); /* Interrupts enabled hereafter */ - mp_cpus_call_cpu_init(); - } else { - cpu_machine_init(); /* Interrupts enabled hereafter */ } + cpu_machine_init(); /* Interrupts enabled hereafter */ } #undef cpu_number @@ -1699,7 +1881,7 @@ _cpu_warm_setup( { cpu_warm_data_t cwdp = (cpu_warm_data_t)arg; - timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL); + timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); cwdp->cwd_result = 0; return; @@ -1751,3 +1933,22 @@ ml_interrupt_prewarm( return cwd.cwd_result; } } + +#if DEBUG || DEVELOPMENT +void +kernel_spin(uint64_t spin_ns) +{ + boolean_t istate; + uint64_t spin_abs; + uint64_t deadline; + + kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns); + istate = ml_set_interrupts_enabled(FALSE); + nanoseconds_to_absolutetime(spin_ns, &spin_abs); + deadline = mach_absolute_time() + spin_ns; + while (mach_absolute_time() < deadline) + cpu_pause(); + ml_set_interrupts_enabled(istate); + kprintf("kernel_spin() continuing\n"); +} +#endif