X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3903760236c30e3b5ace7a4eefac3a269d68957c..d26ffc64f583ab2d29df48f13518685602bc8832:/osfmk/i386/mp.c diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index c0a70f349..3b7232687 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -29,7 +29,6 @@ * @OSF_COPYRIGHT@ */ -#include #include #include #include @@ -78,13 +77,14 @@ #endif #include -#include -#include - #include #include +#if MONOTONIC +#include +#endif /* MONOTONIC */ + #if MP_DEBUG #define PAUSE delay(1000000) #define DBG(x...) kprintf(x) @@ -112,7 +112,6 @@ void i386_cpu_IPI(int cpu); static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); #endif /* MACH_KDP */ -static boolean_t mp_safe_spin_lock(usimple_lock_t lock); #if MACH_KDP static boolean_t cpu_signal_pending(int cpu, mp_event_t event); #endif /* MACH_KDP */ @@ -122,7 +121,10 @@ boolean_t smp_initialized = FALSE; uint32_t TSC_sync_margin = 0xFFF; volatile boolean_t force_immediate_debugger_NMI = FALSE; volatile boolean_t pmap_tlb_flush_timeout = FALSE; -decl_simple_lock_data(,mp_kdp_lock); +#if DEBUG || DEVELOPMENT +boolean_t mp_interrupt_watchdog_enabled = TRUE; +uint32_t mp_interrupt_watchdog_events = 0; +#endif decl_simple_lock_data(,debugger_callback_lock); struct debugger_callback *debugger_callback = NULL; @@ -168,6 +170,8 @@ lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; volatile long NMIPI_acks = 0; volatile long NMI_count = 0; +static NMI_reason_t NMI_panic_reason = NONE; +static int vector_timed_out; extern void NMI_cpus(void); @@ -175,8 +179,6 @@ static void mp_cpus_call_init(void); static void mp_cpus_call_action(void); static void mp_call_PM(void); -static boolean_t mp_cpus_call_wait_timeout = FALSE; - char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init /* PAL-related routines */ @@ -184,7 +186,7 @@ boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, int ipi_vector, i386_intr_func_t ipi_handler); void i386_start_cpu(int lapic_id, int cpu_num); void i386_send_NMI(int cpu); - +void NMIPI_enable(boolean_t); #if GPROF /* * Initialize dummy structs for profiling. These aren't used but @@ -227,7 +229,6 @@ static void free_warm_timer_call(timer_call_t call); void smp_init(void) { - simple_lock_init(&mp_kdp_lock, 0); simple_lock_init(&mp_rv_lock, 0); simple_lock_init(&debugger_callback_lock, 0); lck_grp_attr_setdefault(&smp_lck_grp_attr); @@ -248,6 +249,15 @@ smp_init(void) mp_cpus_call_init(); mp_cpus_call_cpu_init(master_cpu); +#if DEBUG || DEVELOPMENT + if (PE_parse_boot_argn("interrupt_watchdog", + &mp_interrupt_watchdog_enabled, + sizeof(mp_interrupt_watchdog_enabled))) { + kprintf("Interrupt watchdog %sabled\n", + mp_interrupt_watchdog_enabled ? "en" : "dis"); + } +#endif + if (PE_parse_boot_argn("TSC_sync_margin", &TSC_sync_margin, sizeof(TSC_sync_margin))) { kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin); @@ -444,7 +454,7 @@ intel_startCPU( * Initialize (or re-initialize) the descriptor tables for this cpu. * Propagate processor mode to slave. */ - cpu_desc_init64(cpu_datap(slot_num)); + cpu_desc_init(cpu_datap(slot_num)); /* Serialize use of the slave boot stack, etc. */ lck_mtx_lock(&mp_cpu_boot_lock); @@ -539,10 +549,6 @@ cpu_signal_handler(x86_saved_state_t *regs) DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); i_bit_clear(MP_TLB_FLUSH, my_word); pmap_update_interrupt(); - } else if (i_bit(MP_CHUD, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_CHUD); - i_bit_clear(MP_CHUD, my_word); - chudxnu_cpu_signal_handler(); } else if (i_bit(MP_CALL, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_CALL); i_bit_clear(MP_CALL, my_word); @@ -570,6 +576,8 @@ static int NMIInterruptHandler(x86_saved_state_t *regs) { void *stackptr; + char pstr[192]; + uint64_t now = mach_absolute_time(); if (panic_active() && !panicDebugging) { if (pmsafe_debug) @@ -586,29 +594,34 @@ NMIInterruptHandler(x86_saved_state_t *regs) if (cpu_number() == debugger_cpu) goto NMExit; - if (spinlock_timed_out) { - char pstr[192]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); - panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); - } else if (mp_cpus_call_wait_timeout) { - char pstr[192]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number()); + if (NMI_panic_reason == SPINLOCK_TIMEOUT) { + snprintf(&pstr[0], sizeof(pstr), + "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", + cpu_number(), now, spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); - } else if (pmap_tlb_flush_timeout == TRUE) { - char pstr[128]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); + } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) { + snprintf(&pstr[0], sizeof(pstr), + "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n", + cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid); panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); - } - + } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) { + snprintf(&pstr[0], sizeof(pstr), + "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n", + cpu_number(), now); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) { + snprintf(&pstr[0], sizeof(pstr), + "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n", + cpu_number(), now, vector_timed_out); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + } + #if MACH_KDP if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); current_cpu_datap()->cpu_NMI_acknowledged = TRUE; i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals); - if (pmap_tlb_flush_timeout || - spinlock_timed_out || - mp_cpus_call_wait_timeout || - panic_active()) { + if (panic_active() || NMI_panic_reason != NONE) { mp_kdp_wait(FALSE, TRUE); } else if (!mp_kdp_trap && !mp_kdp_is_NMI && @@ -679,7 +692,7 @@ NMI_cpus(void) intrs_enabled = ml_set_interrupts_enabled(FALSE); for (cpu = 0; cpu < real_ncpus; cpu++) { - if (!cpu_datap(cpu)->cpu_running) + if (!cpu_is_running(cpu)) continue; cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; cpu_NMI_interrupt(cpu); @@ -791,7 +804,7 @@ mp_spin_timeout(uint64_t tsc_start) * are still serviced if interrupts are masked while we spin. * Returns current interrupt state. */ -static boolean_t +boolean_t mp_safe_spin_lock(usimple_lock_t lock) { if (ml_get_interrupts_enabled()) { @@ -807,11 +820,9 @@ mp_safe_spin_lock(usimple_lock_t lock) lock->interlock.lock_data; spinlock_timed_out = lock; lock_cpu = spinlock_timeout_NMI(lowner); - panic("mp_safe_spin_lock() timed out," - " lock: %p, owner thread: 0x%lx," - " current_thread: %p, owner on CPU 0x%x", - lock, lowner, - current_thread(), lock_cpu); + NMIPI_panic(cpu_to_cpumask(lock_cpu), SPINLOCK_TIMEOUT); + panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu", + lock, lowner, current_thread(), lock_cpu, mach_absolute_time()); } } return FALSE; @@ -1008,18 +1019,34 @@ mp_call_head_lock(mp_call_queue_t *cqp) return intrs_enabled; } +/* + * Deliver an NMIPI to a set of processors to cause them to panic . + */ void -mp_cpus_NMIPI(cpumask_t cpu_mask) { +NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why) { unsigned int cpu, cpu_bit; uint64_t deadline; + NMIPI_enable(TRUE); + NMI_panic_reason = why; + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if (cpu_mask & cpu_bit) - cpu_NMI_interrupt(cpu); + if ((cpu_mask & cpu_bit) == 0) + continue; + cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; + cpu_NMI_interrupt(cpu); + } + + /* Wait (only so long) for NMi'ed cpus to respond */ + deadline = mach_absolute_time() + LockTimeOut; + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if ((cpu_mask & cpu_bit) == 0) + continue; + while (!cpu_datap(cpu)->cpu_NMI_acknowledged && + mach_absolute_time() < deadline) { + cpu_pause(); + } } - deadline = mach_absolute_time() + (LockTimeOut); - while (mach_absolute_time() < deadline) - cpu_pause(); } #if MACH_ASSERT @@ -1200,9 +1227,8 @@ mp_cpus_call_wait(boolean_t intrs_enabled, if (mp_spin_timeout(tsc_spin_start)) { cpumask_t cpus_unresponsive; - mp_cpus_call_wait_timeout = TRUE; cpus_unresponsive = cpus_called & ~(*cpus_responded); - mp_cpus_NMIPI(cpus_unresponsive); + NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT); panic("mp_cpus_call_wait() timeout, cpus: 0x%llx", cpus_unresponsive); } @@ -1260,7 +1286,7 @@ mp_cpus_call1( } for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if (((cpu_to_cpumask(cpu) & cpus) == 0) || - !cpu_datap(cpu)->cpu_running) + !cpu_is_running(cpu)) continue; tsc_spin_start = rdtsc64(); if (cpu == (cpu_t) cpu_number()) { @@ -1428,7 +1454,7 @@ mp_cpus_kick(cpumask_t cpus) for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if ((cpu == (cpu_t) cpu_number()) || ((cpu_to_cpumask(cpu) & cpus) == 0) - || (!cpu_datap(cpu)->cpu_running)) + || !cpu_is_running(cpu)) { continue; } @@ -1452,7 +1478,7 @@ i386_activate_cpu(void) return; } - simple_lock(&x86_topo_lock); + mp_safe_spin_lock(&x86_topo_lock); cdp->cpu_running = TRUE; started_cpu(); simple_unlock(&x86_topo_lock); @@ -1470,7 +1496,7 @@ i386_deactivate_cpu(void) TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START, 0, 0, 0, 0, 0); - simple_lock(&x86_topo_lock); + mp_safe_spin_lock(&x86_topo_lock); cdp->cpu_running = FALSE; simple_unlock(&x86_topo_lock); @@ -1481,6 +1507,10 @@ i386_deactivate_cpu(void) timer_queue_shutdown(&cdp->rtclock_timer.queue); mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL); +#if MONOTONIC + mt_cpu_down(cdp); +#endif /* MONOTONIC */ + /* * Open an interrupt window * and ensure any pending IPI or timer is serviced @@ -1514,7 +1544,7 @@ boolean_t mp_kdp_state; void -mp_kdp_enter(void) +mp_kdp_enter(boolean_t proceed_on_failure) { unsigned int cpu; unsigned int ncpus = 0; @@ -1523,11 +1553,6 @@ mp_kdp_enter(void) DBG("mp_kdp_enter()\n"); -#if DEBUG - if (!smp_initialized) - simple_lock_init(&mp_kdp_lock, 0); -#endif - /* * Here to enter the debugger. * In case of races, only one cpu is allowed to enter kdp after @@ -1542,26 +1567,44 @@ mp_kdp_enter(void) return; } - cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); - simple_lock(&mp_kdp_lock); - - if (pmsafe_debug && !kdp_snapshot) - pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); + int locked = 0; + while (!locked || mp_kdp_trap) { + if (locked) { + simple_unlock(&x86_topo_lock); + } + if (proceed_on_failure) { + if (mach_absolute_time() - start_time > 500000000ll) { + kprintf("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n"); + break; + } + locked = simple_lock_try(&x86_topo_lock); + if (!locked) { + cpu_pause(); + } + } else { + mp_safe_spin_lock(&x86_topo_lock); + locked = TRUE; + } - while (mp_kdp_trap) { - simple_unlock(&mp_kdp_lock); - DBG("mp_kdp_enter() race lost\n"); + if (locked && mp_kdp_trap) { + simple_unlock(&x86_topo_lock); + DBG("mp_kdp_enter() race lost\n"); #if MACH_KDP - mp_kdp_wait(TRUE, FALSE); + mp_kdp_wait(TRUE, FALSE); #endif - simple_lock(&mp_kdp_lock); + locked = FALSE; + } } + + if (pmsafe_debug && !kdp_snapshot) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + debugger_cpu = my_cpu; ncpus = 1; - mp_kdp_ncpus = 1; /* self */ + atomic_incl((volatile long *)&mp_kdp_ncpus, 1); mp_kdp_trap = TRUE; debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time; - simple_unlock(&mp_kdp_lock); /* * Deliver a nudge to other cpus, counting how many @@ -1569,7 +1612,7 @@ mp_kdp_enter(void) DBG("mp_kdp_enter() signaling other processors\n"); if (force_immediate_debugger_NMI == FALSE) { for (cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + if (cpu == my_cpu || !cpu_is_running(cpu)) continue; ncpus++; i386_signal_cpu(cpu, MP_KDP, ASYNC); @@ -1598,15 +1641,22 @@ mp_kdp_enter(void) cpu_pause(); } /* If we've timed out, and some processor(s) are still unresponsive, - * interrupt them with an NMI via the local APIC. + * interrupt them with an NMI via the local APIC, iff a panic is + * in progress. */ + if (panic_active()) { + NMIPI_enable(TRUE); + } if (mp_kdp_ncpus != ncpus) { + cpumask_t cpus_NMI_pending = 0; DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu); for (cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + if (cpu == my_cpu || !cpu_is_running(cpu)) continue; - if (cpu_signal_pending(cpu, MP_KDP)) + if (cpu_signal_pending(cpu, MP_KDP)) { + cpus_NMI_pending |= cpu_to_cpumask(cpu); cpu_NMI_interrupt(cpu); + } } /* Wait again for the same timeout */ tsc_timeout = rdtsc64() + (LockTimeOutTSC); @@ -1615,17 +1665,21 @@ mp_kdp_enter(void) cpu_pause(); } if (mp_kdp_ncpus != ncpus) { - panic("mp_kdp_enter() timed-out waiting after NMI"); + kdb_printf("mp_kdp_enter(): %llu, %lu, %u TIMED-OUT WAITING FOR NMI-ACK, PROCEEDING\n", cpus_NMI_pending, mp_kdp_ncpus, ncpus); } } } else for (cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + if (cpu == my_cpu || !cpu_is_running(cpu)) continue; cpu_NMI_interrupt(cpu); } + if (locked) { + simple_unlock(&x86_topo_lock); + } + DBG("mp_kdp_enter() %d processors done %s\n", (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); @@ -1679,8 +1733,7 @@ static void mp_kdp_wait(boolean_t flush, boolean_t isNMI) { DBG("mp_kdp_wait()\n"); - /* If an I/O port has been specified as a debugging aid, issue a read */ - panic_io_port_read(); + current_cpu_datap()->debugger_ipi_time = mach_absolute_time(); #if CONFIG_MCA /* If we've trapped due to a machine-check, save MCA registers */ @@ -1738,35 +1791,7 @@ mp_kdp_exit(void) DBG("mp_kdp_exit() done\n"); (void) ml_set_interrupts_enabled(mp_kdp_state); - postcode(0); -} - -#define TRAP_DEBUGGER __asm__ volatile("int3") - -kern_return_t -DebuggerWithCallback(kern_return_t (*callback) (void*), - void *callback_context, - boolean_t proceed_on_sync_failure) -{ - simple_lock(&debugger_callback_lock); - - struct debugger_callback callback_buf = { - .callback = callback, - .callback_context = callback_context, - .proceed_on_sync_failure = proceed_on_sync_failure, - .error = KERN_FAILURE - }; - - assert(debugger_callback == NULL); - debugger_callback = &callback_buf; - - TRAP_DEBUGGER; - - debugger_callback = NULL; - - simple_unlock(&debugger_callback_lock); - - return callback_buf.error; + postcode(MP_KDP_EXIT); } #endif /* MACH_KDP */ @@ -1942,14 +1967,95 @@ kernel_spin(uint64_t spin_ns) boolean_t istate; uint64_t spin_abs; uint64_t deadline; + cpu_data_t *cdp; kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns); istate = ml_set_interrupts_enabled(FALSE); + cdp = current_cpu_datap(); nanoseconds_to_absolutetime(spin_ns, &spin_abs); + + /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */ + cdp->cpu_int_event_time = mach_absolute_time(); + cdp->cpu_int_state = (void *) USER_STATE(current_thread()); + deadline = mach_absolute_time() + spin_ns; while (mach_absolute_time() < deadline) cpu_pause(); + + cdp->cpu_int_event_time = 0; + cdp->cpu_int_state = NULL; + ml_set_interrupts_enabled(istate); kprintf("kernel_spin() continuing\n"); } + +/* + * Called from the scheduler's maintenance thread, + * scan running processors for long-running ISRs and: + * - panic if longer than LockTimeOut, or + * - log if more than a quantum. + */ +void +mp_interrupt_watchdog(void) +{ + cpu_t cpu; + boolean_t intrs_enabled = FALSE; + uint16_t cpu_int_num; + uint64_t cpu_int_event_time; + uint64_t cpu_rip; + uint64_t cpu_int_duration; + uint64_t now; + x86_saved_state_t *cpu_int_state; + + if (__improbable(!mp_interrupt_watchdog_enabled)) + return; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + now = mach_absolute_time(); + /* + * While timeouts are not suspended, + * check all other processors for long outstanding interrupt handling. + */ + for (cpu = 0; + cpu < (cpu_t) real_ncpus && !machine_timeout_suspended(); + cpu++) { + if ((cpu == (cpu_t) cpu_number()) || + (!cpu_is_running(cpu))) + continue; + cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time; + if (cpu_int_event_time == 0) + continue; + if (__improbable(now < cpu_int_event_time)) + continue; /* skip due to inter-processor skew */ + cpu_int_state = cpu_datap(cpu)->cpu_int_state; + if (__improbable(cpu_int_state == NULL)) + /* The interrupt may have been dismissed */ + continue; + + /* Here with a cpu handling an interrupt */ + + cpu_int_duration = now - cpu_int_event_time; + if (__improbable(cpu_int_duration > LockTimeOut)) { + cpu_int_num = saved_state64(cpu_int_state)->isf.trapno; + cpu_rip = saved_state64(cpu_int_state)->isf.rip; + vector_timed_out = cpu_int_num; + NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG); + panic("Interrupt watchdog, " + "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx", + cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip); + /* NOT REACHED */ + } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) { + mp_interrupt_watchdog_events++; + cpu_int_num = saved_state64(cpu_int_state)->isf.trapno; + cpu_rip = saved_state64(cpu_int_state)->isf.rip; + ml_set_interrupts_enabled(intrs_enabled); + printf("Interrupt watchdog, " + "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n", + cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip); + return; + } + } + + ml_set_interrupts_enabled(intrs_enabled); +} #endif