X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/593a1d5fd87cdf5b46dd5fcb84467b432cea0f91..316670eb35587141e969394ae8537d66b9211e80:/osfmk/i386/mp.c?ds=sidebyside diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 507399783..b66399d2d 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,5 +1,4 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,7 +29,6 @@ */ #include -#include #include #include #include @@ -40,6 +38,7 @@ #include #include +#include #include #include #include @@ -47,47 +46,38 @@ #include #include #include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include #include +#include #include +#include #include +#include +#include +#include +#include +#include +#include #include +#if CONFIG_MCA #include +#endif +#include #include #include #include -#if MACH_KDB -#include -#include -#include -#include -#include -#include -#include -#include -#endif #if MP_DEBUG #define PAUSE delay(1000000) @@ -97,54 +87,84 @@ #define PAUSE #endif /* MP_DEBUG */ -#define FULL_SLAVE_INIT (NULL) -#define FAST_SLAVE_INIT ((void *)(uintptr_t)1) +/* Debugging/test trace events: */ +#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0) +#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1) +#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2) +#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3) +#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4) -void slave_boot_init(void); +#define ABS(v) (((v) > 0)?(v):-(v)) -#if MACH_KDB -static void mp_kdb_wait(void); -volatile boolean_t mp_kdb_trap = FALSE; -volatile long mp_kdb_ncpus = 0; -#endif +void slave_boot_init(void); +void i386_cpu_IPI(int cpu); -static void mp_kdp_wait(boolean_t flush); +static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); static void mp_rendezvous_action(void); static void mp_broadcast_action(void); static boolean_t cpu_signal_pending(int cpu, mp_event_t event); -static int cpu_signal_handler(x86_saved_state_t *regs); static int NMIInterruptHandler(x86_saved_state_t *regs); -boolean_t smp_initialized = FALSE; +boolean_t smp_initialized = FALSE; +uint32_t TSC_sync_margin = 0xFFF; volatile boolean_t force_immediate_debugger_NMI = FALSE; volatile boolean_t pmap_tlb_flush_timeout = FALSE; - decl_simple_lock_data(,mp_kdp_lock); -decl_mutex_data(static, mp_cpu_boot_lock); +decl_lck_mtx_data(static, mp_cpu_boot_lock); +lck_mtx_ext_t mp_cpu_boot_lock_ext; /* Variables needed for MP rendezvous. */ decl_simple_lock_data(,mp_rv_lock); -static void (*mp_rv_setup_func)(void *arg); -static void (*mp_rv_action_func)(void *arg); -static void (*mp_rv_teardown_func)(void *arg); -static void *mp_rv_func_arg; -static int mp_rv_ncpus; +static void (*mp_rv_setup_func)(void *arg); +static void (*mp_rv_action_func)(void *arg); +static void (*mp_rv_teardown_func)(void *arg); +static void *mp_rv_func_arg; +static volatile int mp_rv_ncpus; /* Cache-aligned barriers: */ static volatile long mp_rv_entry __attribute__((aligned(64))); static volatile long mp_rv_exit __attribute__((aligned(64))); static volatile long mp_rv_complete __attribute__((aligned(64))); +volatile uint64_t debugger_entry_time; +volatile uint64_t debugger_exit_time; +#if MACH_KDP +#include +extern int kdp_snapshot; +static struct _kdp_xcpu_call_func { + kdp_x86_xcpu_func_t func; + void *arg0, *arg1; + volatile long ret; + volatile uint16_t cpu; +} kdp_xcpu_call_func = { + .cpu = KDP_XCPU_NONE +}; + +#endif + /* Variables needed for MP broadcast. */ static void (*mp_bc_action_func)(void *arg); static void *mp_bc_func_arg; static int mp_bc_ncpus; static volatile long mp_bc_count; -decl_mutex_data(static, mp_bc_lock); +decl_lck_mtx_data(static, mp_bc_lock); +lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; +volatile long NMIPI_acks = 0; +static void mp_cpus_call_init(void); +static void mp_cpus_call_cpu_init(void); static void mp_cpus_call_action(void); +static void mp_call_PM(void); + +char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init + +/* PAL-related routines */ +boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, + int ipi_vector, i386_intr_func_t ipi_handler); +void i386_start_cpu(int lapic_id, int cpu_num); +void i386_send_NMI(int cpu); #if GPROF /* @@ -165,123 +185,97 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; #define GPROF_INIT() #endif /* GPROF */ +static lck_grp_t smp_lck_grp; +static lck_grp_attr_t smp_lck_grp_attr; + +#define NUM_CPU_WARM_CALLS 20 +struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS]; +queue_head_t cpu_warm_call_list; +decl_simple_lock_data(static, cpu_warm_lock); + +typedef struct cpu_warm_data { + timer_call_t cwd_call; + uint64_t cwd_deadline; + int cwd_result; +} *cpu_warm_data_t; + +static void cpu_prewarm_init(void); +static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1); +static void _cpu_warm_setup(void *arg); +static timer_call_t grab_warm_timer_call(void); +static void free_warm_timer_call(timer_call_t call); + void smp_init(void) { simple_lock_init(&mp_kdp_lock, 0); simple_lock_init(&mp_rv_lock, 0); - mutex_init(&mp_cpu_boot_lock, 0); - mutex_init(&mp_bc_lock, 0); + lck_grp_attr_setdefault(&smp_lck_grp_attr); + lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr); + lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); + lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); console_init(); - /* Local APIC? */ - if (!lapic_probe()) + if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler, + LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler)) return; - lapic_init(); - lapic_configure(); - lapic_set_intr_func(LAPIC_NMI_INTERRUPT, NMIInterruptHandler); - lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler); - cpu_thread_init(); GPROF_INIT(); DBGLOG_CPU_INIT(master_cpu); - slave_boot_init(); + mp_cpus_call_init(); + mp_cpus_call_cpu_init(); + if (PE_parse_boot_argn("TSC_sync_margin", + &TSC_sync_margin, sizeof(TSC_sync_margin))) { + kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin); + } else if (cpuid_vmm_present()) { + kprintf("TSC sync margin disabled\n"); + TSC_sync_margin = 0; + } smp_initialized = TRUE; + cpu_prewarm_init(); + return; } +typedef struct { + int target_cpu; + int target_lapic; + int starter_cpu; +} processor_start_info_t; +static processor_start_info_t start_info __attribute__((aligned(64))); + +/* + * Cache-alignment is to avoid cross-cpu false-sharing interference. + */ +static volatile long tsc_entry_barrier __attribute__((aligned(64))); +static volatile long tsc_exit_barrier __attribute__((aligned(64))); +static volatile uint64_t tsc_target __attribute__((aligned(64))); + /* * Poll a CPU to see when it has marked itself as running. */ static void mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay) { - while (iters-- > 0) { + while (iters-- > 0) { if (cpu_datap(slot_num)->cpu_running) - break; + break; delay(usecdelay); } } -kern_return_t -intel_startCPU( - int slot_num) -{ - - int i = 1000; - int lapic = cpu_to_lapic[slot_num]; - - assert(lapic != -1); - - DBGLOG_CPU_INIT(slot_num); - - DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); - DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD); - - /* - * Initialize (or re-initialize) the descriptor tables for this cpu. - * Propagate processor mode to slave. - */ - if (cpu_mode_is64bit()) - cpu_desc_init64(cpu_datap(slot_num), FALSE); - else - cpu_desc_init(cpu_datap(slot_num), FALSE); - - /* Serialize use of the slave boot stack. */ - mutex_lock(&mp_cpu_boot_lock); - - mp_disable_preemption(); - if (slot_num == get_cpu_number()) { - mp_enable_preemption(); - mutex_unlock(&mp_cpu_boot_lock); - return KERN_SUCCESS; - } - - LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT); - delay(10000); - - LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12)); - delay(200); - - LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12)); - delay(200); - -#ifdef POSTCODE_DELAY - /* Wait much longer if postcodes are displayed for a delay period. */ - i *= 10000; -#endif - mp_wait_for_cpu_up(slot_num, i, 10000); - - mp_enable_preemption(); - mutex_unlock(&mp_cpu_boot_lock); - - if (!cpu_datap(slot_num)->cpu_running) { - kprintf("Failed to start CPU %02d\n", slot_num); - printf("Failed to start CPU %02d, rebooting...\n", slot_num); - delay(1000000); - cpu_shutdown(); - return KERN_SUCCESS; - } else { - kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); - return KERN_SUCCESS; - } -} - /* * Quickly bring a CPU back online which has been halted. */ kern_return_t intel_startCPU_fast(int slot_num) { - kern_return_t rc; + kern_return_t rc; /* * Try to perform a fast restart @@ -303,6 +297,7 @@ intel_startCPU_fast(int slot_num) * longer than a full restart would require so it should be more * than long enough. */ + mp_wait_for_cpu_up(slot_num, 30000, 1); mp_enable_preemption(); @@ -312,51 +307,141 @@ intel_startCPU_fast(int slot_num) */ if (cpu_datap(slot_num)->cpu_running) return(KERN_SUCCESS); - else + else return(KERN_FAILURE); } -extern char slave_boot_base[]; -extern char slave_boot_end[]; -extern void slave_pstart(void); +static void +started_cpu(void) +{ + /* Here on the started cpu with cpu_running set TRUE */ + + if (TSC_sync_margin && + start_info.target_cpu == cpu_number()) { + /* + * I've just started-up, synchronize again with the starter cpu + * and then snap my TSC. + */ + tsc_target = 0; + atomic_decl(&tsc_entry_barrier, 1); + while (tsc_entry_barrier != 0) + ; /* spin for starter and target at barrier */ + tsc_target = rdtsc64(); + atomic_decl(&tsc_exit_barrier, 1); + } +} -void -slave_boot_init(void) +static void +start_cpu(void *arg) { - DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n", - slave_boot_base, - kvtophys((vm_offset_t) slave_boot_base), - MP_BOOT, - slave_boot_end-slave_boot_base); + int i = 1000; + processor_start_info_t *psip = (processor_start_info_t *) arg; - /* - * Copy the boot entry code to the real-mode vector area MP_BOOT. - * This is in page 1 which has been reserved for this purpose by - * machine_startup() from the boot processor. - * The slave boot code is responsible for switching to protected - * mode and then jumping to the common startup, _start(). - */ - bcopy_phys(kvtophys((vm_offset_t) slave_boot_base), - (addr64_t) MP_BOOT, - slave_boot_end-slave_boot_base); + /* Ignore this if the current processor is not the starter */ + if (cpu_number() != psip->starter_cpu) + return; + + i386_start_cpu(psip->target_lapic, psip->target_cpu); + +#ifdef POSTCODE_DELAY + /* Wait much longer if postcodes are displayed for a delay period. */ + i *= 10000; +#endif + mp_wait_for_cpu_up(psip->target_cpu, i*100, 100); + if (TSC_sync_margin && + cpu_datap(psip->target_cpu)->cpu_running) { + /* + * Compare the TSC from the started processor with ours. + * Report and log/panic if it diverges by more than + * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin + * can be overriden by boot-arg (with 0 meaning no checking). + */ + uint64_t tsc_starter; + int64_t tsc_delta; + atomic_decl(&tsc_entry_barrier, 1); + while (tsc_entry_barrier != 0) + ; /* spin for both processors at barrier */ + tsc_starter = rdtsc64(); + atomic_decl(&tsc_exit_barrier, 1); + while (tsc_exit_barrier != 0) + ; /* spin for target to store its TSC */ + tsc_delta = tsc_target - tsc_starter; + kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n", + psip->target_cpu, tsc_target, tsc_delta, tsc_delta); + if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) { +#if DEBUG + panic( +#else + printf( +#endif + "Unsynchronized TSC for cpu %d: " + "0x%016llx, delta 0x%llx\n", + psip->target_cpu, tsc_target, tsc_delta); + } + } +} + +kern_return_t +intel_startCPU( + int slot_num) +{ + int lapic = cpu_to_lapic[slot_num]; + boolean_t istate; + + assert(lapic != -1); + + DBGLOG_CPU_INIT(slot_num); + + DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); + DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD); /* - * Zero a stack area above the boot code. + * Initialize (or re-initialize) the descriptor tables for this cpu. + * Propagate processor mode to slave. */ - DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK+MP_BOOT-0x400, 0x400); - bzero_phys((addr64_t)MP_BOOTSTACK+MP_BOOT-0x400, 0x400); + if (cpu_mode_is64bit()) + cpu_desc_init64(cpu_datap(slot_num)); + else + cpu_desc_init(cpu_datap(slot_num)); + + /* Serialize use of the slave boot stack, etc. */ + lck_mtx_lock(&mp_cpu_boot_lock); + + istate = ml_set_interrupts_enabled(FALSE); + if (slot_num == get_cpu_number()) { + ml_set_interrupts_enabled(istate); + lck_mtx_unlock(&mp_cpu_boot_lock); + return KERN_SUCCESS; + } + + start_info.starter_cpu = cpu_number(); + start_info.target_cpu = slot_num; + start_info.target_lapic = lapic; + tsc_entry_barrier = 2; + tsc_exit_barrier = 2; /* - * Set the location at the base of the stack to point to the - * common startup entry. + * Perform the processor startup sequence with all running + * processors rendezvous'ed. This is required during periods when + * the cache-disable bit is set for MTRR/PAT initialization. */ - DBG("writing 0x%x at phys 0x%x\n", - kvtophys((vm_offset_t) &slave_pstart), MP_MACH_START+MP_BOOT); - ml_phys_write_word(MP_MACH_START+MP_BOOT, - (unsigned int)kvtophys((vm_offset_t) &slave_pstart)); - - /* Flush caches */ - __asm__("wbinvd"); + mp_rendezvous_no_intrs(start_cpu, (void *) &start_info); + + start_info.target_cpu = 0; + + ml_set_interrupts_enabled(istate); + lck_mtx_unlock(&mp_cpu_boot_lock); + + if (!cpu_datap(slot_num)->cpu_running) { + kprintf("Failed to start CPU %02d\n", slot_num); + printf("Failed to start CPU %02d, rebooting...\n", slot_num); + delay(1000000); + halt_cpu(); + return KERN_SUCCESS; + } else { + kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); + return KERN_SUCCESS; + } } #if MP_DEBUG @@ -372,20 +457,19 @@ cpu_signal_handler(x86_saved_state_t *regs) { int my_cpu; volatile int *my_word; -#if MACH_KDB && MACH_ASSERT - int i=100; -#endif /* MACH_KDB && MACH_ASSERT */ - mp_disable_preemption(); + SCHED_STATS_IPI(current_processor()); my_cpu = cpu_number(); - my_word = ¤t_cpu_datap()->cpu_signals; + my_word = &cpu_data_ptr[my_cpu]->cpu_signals; + /* Store the initial set of signals for diagnostics. New + * signals could arrive while these are being processed + * so it's no more than a hint. + */ + + cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word; do { -#if MACH_KDB && MACH_ASSERT - if (i-- <= 0) - Debugger("cpu_signal_handler: signals did not clear"); -#endif /* MACH_KDB && MACH_ASSERT */ #if MACH_KDP if (i_bit(MP_KDP, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_KDP); @@ -394,10 +478,13 @@ cpu_signal_handler(x86_saved_state_t *regs) * current thread's stack (if any) is synchronized with the * context at the moment of the interrupt, to facilitate * access through the debugger. - * XXX 64-bit state? */ - sync_iss_to_iks(saved_state32(regs)); - mp_kdp_wait(TRUE); + sync_iss_to_iks(regs); + if (pmsafe_debug && !kdp_snapshot) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + mp_kdp_wait(TRUE, FALSE); + if (pmsafe_debug && !kdp_snapshot) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); } else #endif /* MACH_KDP */ if (i_bit(MP_TLB_FLUSH, my_word)) { @@ -408,14 +495,6 @@ cpu_signal_handler(x86_saved_state_t *regs) DBGLOG(cpu_handle,my_cpu,MP_AST); i_bit_clear(MP_AST, my_word); ast_check(cpu_to_processor(my_cpu)); -#if MACH_KDB - } else if (i_bit(MP_KDB, my_word)) { - - i_bit_clear(MP_KDB, my_word); - current_cpu_datap()->cpu_kdb_is_slave++; - mp_kdb_wait(); - current_cpu_datap()->cpu_kdb_is_slave--; -#endif /* MACH_KDB */ } else if (i_bit(MP_RENDEZVOUS, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); i_bit_clear(MP_RENDEZVOUS, my_word); @@ -432,11 +511,13 @@ cpu_signal_handler(x86_saved_state_t *regs) DBGLOG(cpu_handle,my_cpu,MP_CALL); i_bit_clear(MP_CALL, my_word); mp_cpus_call_action(); + } else if (i_bit(MP_CALL_PM, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_CALL_PM); + i_bit_clear(MP_CALL_PM, my_word); + mp_call_PM(); } } while (*my_word); - mp_enable_preemption(); - return 0; } @@ -444,75 +525,47 @@ static int NMIInterruptHandler(x86_saved_state_t *regs) { void *stackptr; - + + if (panic_active() && !panicDebugging) { + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + for(;;) + cpu_pause(); + } + + atomic_incl(&NMIPI_acks, 1); sync_iss_to_iks_unconditionally(regs); +#if defined (__i386__) __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); +#elif defined (__x86_64__) + __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); +#endif if (cpu_number() == debugger_cpu) goto NMExit; - if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) { + if (spinlock_timed_out) { + char pstr[192]; + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + } else if (pmap_tlb_flush_timeout == TRUE) { char pstr[128]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number()); - panic_i386_backtrace(stackptr, 10, &pstr[0], TRUE, regs); - panic_io_port_read(); - mca_check_save(); - if (pmsafe_debug) - pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); - for(;;) { - cpu_pause(); - } + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); + panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); } - mp_kdp_wait(FALSE); + +#if MACH_KDP + if (pmsafe_debug && !kdp_snapshot) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + current_cpu_datap()->cpu_NMI_acknowledged = TRUE; + mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); + if (pmsafe_debug && !kdp_snapshot) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); +#endif NMExit: return 1; } -#ifdef MP_DEBUG -int max_lock_loops = 1000000; -int trappedalready = 0; /* (BRINGUP */ -#endif /* MP_DEBUG */ - -static void -i386_cpu_IPI(int cpu) -{ - boolean_t state; - -#ifdef MP_DEBUG - if(cpu_datap(cpu)->cpu_signals & 6) { /* (BRINGUP) */ - kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu); - } -#endif /* MP_DEBUG */ - -#if MACH_KDB -#ifdef MP_DEBUG - if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) { /* (BRINGUP) */ - if(kdb_cpu != cpu_number()) { - trappedalready = 1; - panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", - cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu); - } - } -#endif /* MP_DEBUG */ -#endif - - /* Wait for previous interrupt to be delivered... */ -#ifdef MP_DEBUG - int pending_busy_count = 0; - while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) { - if (++pending_busy_count > max_lock_loops) - panic("i386_cpu_IPI() deadlock\n"); -#else - while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) { -#endif /* MP_DEBUG */ - cpu_pause(); - } - - state = ml_set_interrupts_enabled(FALSE); - LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED); - (void) ml_set_interrupts_enabled(state); -} /* * cpu_interrupt is really just to be used by the scheduler to @@ -522,10 +575,15 @@ i386_cpu_IPI(int cpu) void cpu_interrupt(int cpu) { + boolean_t did_IPI = FALSE; + if (smp_initialized && pmCPUExitIdle(cpu_datap(cpu))) { i386_cpu_IPI(cpu); + did_IPI = TRUE; } + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0); } /* @@ -534,20 +592,41 @@ cpu_interrupt(int cpu) void cpu_NMI_interrupt(int cpu) { - boolean_t state; - if (smp_initialized) { - state = ml_set_interrupts_enabled(FALSE); -/* Program the interrupt command register */ - LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT); -/* The vector is ignored in this case--the target CPU will enter on the - * NMI vector. - */ - LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI); - (void) ml_set_interrupts_enabled(state); + i386_send_NMI(cpu); + } +} + +static void (* volatile mp_PM_func)(void) = NULL; + +static void +mp_call_PM(void) +{ + assert(!ml_get_interrupts_enabled()); + + if (mp_PM_func != NULL) + mp_PM_func(); +} + +void +cpu_PM_interrupt(int cpu) +{ + assert(!ml_get_interrupts_enabled()); + + if (mp_PM_func != NULL) { + if (cpu == cpu_number()) + mp_PM_func(); + else + i386_signal_cpu(cpu, MP_CALL_PM, ASYNC); } } +void +PM_interrupt_register(void (*fn)(void)) +{ + mp_PM_func = fn; +} + void i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) { @@ -559,7 +638,7 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) return; if (event == MP_TLB_FLUSH) - KERNEL_DEBUG(0xef800020 | DBG_FUNC_START, cpu, 0, 0, 0, 0); + KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0); DBGLOG(cpu_signal, cpu, event); @@ -578,7 +657,7 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) } } if (event == MP_TLB_FLUSH) - KERNEL_DEBUG(0xef800020 | DBG_FUNC_END, cpu, 0, 0, 0, 0); + KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0); } /* @@ -591,7 +670,7 @@ i386_signal_cpus(mp_event_t event, mp_sync_t mode) unsigned int cpu; unsigned int my_cpu = cpu_number(); - assert(hw_lock_held(&x86_topo_lock)); + assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) @@ -610,7 +689,7 @@ i386_active_cpus(void) unsigned int cpu; unsigned int ncpus = 0; - assert(hw_lock_held(&x86_topo_lock)); + assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running) @@ -652,9 +731,11 @@ mp_rendezvous_action(void) handle_pending_TLB_flushes(); cpu_pause(); } + /* action function */ if (mp_rv_action_func != NULL) mp_rv_action_func(mp_rv_func_arg); + /* spin on exit rendezvous */ atomic_incl(&mp_rv_exit, 1); while (mp_rv_exit < mp_rv_ncpus) { @@ -771,38 +852,175 @@ mp_rendezvous_no_intrs( arg); } -void -handle_pending_TLB_flushes(void) + +typedef struct { + queue_chain_t link; /* queue linkage */ + void (*func)(void *,void *); /* routine to call */ + void *arg0; /* routine's 1st arg */ + void *arg1; /* routine's 2nd arg */ + volatile long *countp; /* completion counter */ +} mp_call_t; + + +typedef struct { + queue_head_t queue; + decl_simple_lock_data(, lock); +} mp_call_queue_t; +#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS +static mp_call_queue_t mp_cpus_call_freelist; +static mp_call_queue_t mp_cpus_call_head[MAX_CPUS]; + +static inline boolean_t +mp_call_head_lock(mp_call_queue_t *cqp) +{ + boolean_t intrs_enabled; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + simple_lock(&cqp->lock); + + return intrs_enabled; +} + +static inline boolean_t +mp_call_head_is_locked(mp_call_queue_t *cqp) +{ + return !ml_get_interrupts_enabled() && + hw_lock_held((hw_lock_t)&cqp->lock); +} + +static inline void +mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled) +{ + simple_unlock(&cqp->lock); + ml_set_interrupts_enabled(intrs_enabled); +} + +static inline mp_call_t * +mp_call_alloc(void) +{ + mp_call_t *callp = NULL; + boolean_t intrs_enabled; + mp_call_queue_t *cqp = &mp_cpus_call_freelist; + + intrs_enabled = mp_call_head_lock(cqp); + if (!queue_empty(&cqp->queue)) + queue_remove_first(&cqp->queue, callp, typeof(callp), link); + mp_call_head_unlock(cqp, intrs_enabled); + + return callp; +} + +static inline void +mp_call_free(mp_call_t *callp) +{ + boolean_t intrs_enabled; + mp_call_queue_t *cqp = &mp_cpus_call_freelist; + + intrs_enabled = mp_call_head_lock(cqp); + queue_enter_first(&cqp->queue, callp, typeof(callp), link); + mp_call_head_unlock(cqp, intrs_enabled); +} + +static inline mp_call_t * +mp_call_dequeue_locked(mp_call_queue_t *cqp) { - volatile int *my_word = ¤t_cpu_datap()->cpu_signals; + mp_call_t *callp = NULL; - if (i_bit(MP_TLB_FLUSH, my_word)) { - DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH); - i_bit_clear(MP_TLB_FLUSH, my_word); - pmap_update_interrupt(); + assert(mp_call_head_is_locked(cqp)); + if (!queue_empty(&cqp->queue)) + queue_remove_first(&cqp->queue, callp, typeof(callp), link); + return callp; +} + +static inline void +mp_call_enqueue_locked( + mp_call_queue_t *cqp, + mp_call_t *callp) +{ + queue_enter(&cqp->queue, callp, typeof(callp), link); +} + +/* Called on the boot processor to initialize global structures */ +static void +mp_cpus_call_init(void) +{ + mp_call_queue_t *cqp = &mp_cpus_call_freelist; + + DBG("mp_cpus_call_init()\n"); + simple_lock_init(&cqp->lock, 0); + queue_init(&cqp->queue); +} + +/* + * Called by each processor to add call buffers to the free list + * and to initialize the per-cpu call queue. + * Also called but ignored on slave processors on re-start/wake. + */ +static void +mp_cpus_call_cpu_init(void) +{ + int i; + mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()]; + mp_call_t *callp; + + if (cqp->queue.next != NULL) + return; /* restart/wake case: called already */ + + simple_lock_init(&cqp->lock, 0); + queue_init(&cqp->queue); + for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) { + callp = (mp_call_t *) kalloc(sizeof(mp_call_t)); + mp_call_free(callp); } + + DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number()); } /* * This is called from cpu_signal_handler() to process an MP_CALL signal. + * And also from i386_deactivate_cpu() when a cpu is being taken offline. */ static void mp_cpus_call_action(void) { - if (mp_rv_action_func != NULL) - mp_rv_action_func(mp_rv_func_arg); - atomic_incl(&mp_rv_complete, 1); + mp_call_queue_t *cqp; + boolean_t intrs_enabled; + mp_call_t *callp; + mp_call_t call; + + assert(!ml_get_interrupts_enabled()); + cqp = &mp_cpus_call_head[cpu_number()]; + intrs_enabled = mp_call_head_lock(cqp); + while ((callp = mp_call_dequeue_locked(cqp)) != NULL) { + /* Copy call request to the stack to free buffer */ + call = *callp; + mp_call_free(callp); + if (call.func != NULL) { + mp_call_head_unlock(cqp, intrs_enabled); + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_ACTION, + call.func, call.arg0, call.arg1, call.countp, 0); + call.func(call.arg0, call.arg1); + (void) mp_call_head_lock(cqp); + } + if (call.countp != NULL) + atomic_incl(call.countp, 1); + } + mp_call_head_unlock(cqp, intrs_enabled); } /* * mp_cpus_call() runs a given function on cpus specified in a given cpu mask. - * If the mode is SYNC, the function is called serially on the target cpus - * in logical cpu order. If the mode is ASYNC, the function is called in - * parallel over the specified cpus. + * Possible modes are: + * SYNC: function is called serially on target cpus in logical cpu order + * waiting for each call to be acknowledged before proceeding + * ASYNC: function call is queued to the specified cpus + * waiting for all calls to complete in parallel before returning + * NOSYNC: function calls are queued + * but we return before confirmation of calls completing. * The action function may be NULL. * The cpu mask may include the local cpu. Offline cpus are ignored. - * Return does not occur until the function has completed on all cpus. - * The return value is the number of cpus on which the function was called. + * The return value is the number of cpus on which the call was made or queued. */ cpu_t mp_cpus_call( @@ -810,32 +1028,77 @@ mp_cpus_call( mp_sync_t mode, void (*action_func)(void *), void *arg) +{ + return mp_cpus_call1( + cpus, + mode, + (void (*)(void *,void *))action_func, + arg, + NULL, + NULL, + NULL); +} + +static void +mp_cpus_call_wait(boolean_t intrs_enabled, + long mp_cpus_signals, + volatile long *mp_cpus_calls) +{ + mp_call_queue_t *cqp; + + cqp = &mp_cpus_call_head[cpu_number()]; + + while (*mp_cpus_calls < mp_cpus_signals) { + if (!intrs_enabled) { + /* Sniffing w/o locking */ + if (!queue_empty(&cqp->queue)) + mp_cpus_call_action(); + handle_pending_TLB_flushes(); + } + cpu_pause(); + } +} + +cpu_t +mp_cpus_call1( + cpumask_t cpus, + mp_sync_t mode, + void (*action_func)(void *, void *), + void *arg0, + void *arg1, + cpumask_t *cpus_calledp, + cpumask_t *cpus_notcalledp) { cpu_t cpu; - boolean_t intrs_enabled = ml_get_interrupts_enabled(); + boolean_t intrs_enabled = FALSE; boolean_t call_self = FALSE; + cpumask_t cpus_called = 0; + cpumask_t cpus_notcalled = 0; + long mp_cpus_signals = 0; + volatile long mp_cpus_calls = 0; + + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL | DBG_FUNC_START, + cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1); if (!smp_initialized) { if ((cpus & CPUMASK_SELF) == 0) - return 0; + goto out; if (action_func != NULL) { - (void) ml_set_interrupts_enabled(FALSE); - action_func(arg); + intrs_enabled = ml_set_interrupts_enabled(FALSE); + action_func(arg0, arg1); ml_set_interrupts_enabled(intrs_enabled); } - return 1; + call_self = TRUE; + goto out; } - - /* obtain rendezvous lock */ - simple_lock(&mp_rv_lock); - /* Use the rendezvous data structures for this call */ - mp_rv_action_func = action_func; - mp_rv_func_arg = arg; - mp_rv_ncpus = 0; - mp_rv_complete = 0; - - simple_lock(&x86_topo_lock); + /* + * Queue the call for each non-local requested cpu. + * The topo lock is not taken. Instead we sniff the cpu_running state + * and then re-check it after taking the call lock. A cpu being taken + * offline runs the action function after clearing the cpu_running. + */ for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if (((cpu_to_cpumask(cpu) & cpus) == 0) || !cpu_datap(cpu)->cpu_running) @@ -846,61 +1109,104 @@ mp_cpus_call( * we defer our call until we have signalled all others. */ call_self = TRUE; + cpus_called |= cpu_to_cpumask(cpu); if (mode == SYNC && action_func != NULL) { - (void) ml_set_interrupts_enabled(FALSE); - action_func(arg); - ml_set_interrupts_enabled(intrs_enabled); + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_LOCAL, + VM_KERNEL_UNSLIDE(action_func), + arg0, arg1, 0, 0); + action_func(arg0, arg1); } } else { /* - * Bump count of other cpus called and signal this cpu. - * Note: we signal asynchronously regardless of mode - * because we wait on mp_rv_complete either here - * (if mode == SYNC) or later (if mode == ASYNC). - * While spinning, poll for TLB flushes if interrupts - * are disabled. + * Here to queue a call to cpu and IPI. + * Spinning for request buffer unless NOSYNC. */ - mp_rv_ncpus++; - i386_signal_cpu(cpu, MP_CALL, ASYNC); - if (mode == SYNC) { - simple_unlock(&x86_topo_lock); - while (mp_rv_complete < mp_rv_ncpus) { - if (!intrs_enabled) + mp_call_t *callp = NULL; + mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; + + queue_call: + if (callp == NULL) + callp = mp_call_alloc(); + intrs_enabled = mp_call_head_lock(cqp); + if (!cpu_datap(cpu)->cpu_running) { + mp_call_head_unlock(cqp, intrs_enabled); + continue; + } + if (mode == NOSYNC) { + if (callp == NULL) { + cpus_notcalled |= cpu_to_cpumask(cpu); + mp_call_head_unlock(cqp, intrs_enabled); + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_NOBUF, + cpu, 0, 0, 0, 0); + continue; + } + callp->countp = NULL; + } else { + if (callp == NULL) { + mp_call_head_unlock(cqp, intrs_enabled); + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_NOBUF, + cpu, 0, 0, 0, 0); + if (!intrs_enabled) { + /* Sniffing w/o locking */ + if (!queue_empty(&cqp->queue)) + mp_cpus_call_action(); handle_pending_TLB_flushes(); + } cpu_pause(); + goto queue_call; } - simple_lock(&x86_topo_lock); + callp->countp = &mp_cpus_calls; + } + callp->func = action_func; + callp->arg0 = arg0; + callp->arg1 = arg1; + mp_call_enqueue_locked(cqp, callp); + mp_cpus_signals++; + cpus_called |= cpu_to_cpumask(cpu); + i386_signal_cpu(cpu, MP_CALL, ASYNC); + mp_call_head_unlock(cqp, intrs_enabled); + if (mode == SYNC) { + mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); } } } - simple_unlock(&x86_topo_lock); - /* - * If calls are being made asynchronously, - * make the local call now if needed, and then - * wait for all other cpus to finish their calls. - */ - if (mode == ASYNC) { - if (call_self && action_func != NULL) { - (void) ml_set_interrupts_enabled(FALSE); - action_func(arg); + /* Call locally if mode not SYNC */ + if (mode != SYNC && call_self ) { + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL_LOCAL, + VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0); + if (action_func != NULL) { + ml_set_interrupts_enabled(FALSE); + action_func(arg0, arg1); ml_set_interrupts_enabled(intrs_enabled); } - while (mp_rv_complete < mp_rv_ncpus) { - if (!intrs_enabled) - handle_pending_TLB_flushes(); - cpu_pause(); - } } - - /* Determine the number of cpus called */ - cpu = mp_rv_ncpus + (call_self ? 1 : 0); - simple_unlock(&mp_rv_lock); + /* For ASYNC, now wait for all signaled cpus to complete their calls */ + if (mode == ASYNC) { + mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); + } + +out: + cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0); + + if (cpus_calledp) + *cpus_calledp = cpus_called; + if (cpus_notcalledp) + *cpus_notcalledp = cpus_notcalled; + + KERNEL_DEBUG_CONSTANT( + TRACE_MP_CPUS_CALL | DBG_FUNC_END, + cpu, cpus_called, cpus_notcalled, 0, 0); return cpu; } + static void mp_broadcast_action(void) { @@ -909,8 +1215,8 @@ mp_broadcast_action(void) mp_bc_action_func(mp_bc_func_arg); /* if we're the last one through, wake up the instigator */ - if (atomic_decl_and_test((volatile long *)&mp_bc_count, 1)) - thread_wakeup(((event_t)(unsigned int *) &mp_bc_count)); + if (atomic_decl_and_test(&mp_bc_count, 1)) + thread_wakeup(((event_t)(uintptr_t) &mp_bc_count)); } /* @@ -930,13 +1236,13 @@ mp_broadcast( } /* obtain broadcast lock */ - mutex_lock(&mp_bc_lock); + lck_mtx_lock(&mp_bc_lock); /* set static function pointers */ mp_bc_action_func = action_func; mp_bc_func_arg = arg; - assert_wait(&mp_bc_count, THREAD_UNINT); + assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT); /* * signal other processors, which will call mp_broadcast_action() @@ -957,7 +1263,7 @@ mp_broadcast( clear_wait(current_thread(), THREAD_AWAKENED); /* release lock */ - mutex_unlock(&mp_bc_lock); + lck_mtx_unlock(&mp_bc_lock); } void @@ -974,9 +1280,13 @@ i386_activate_cpu(void) simple_lock(&x86_topo_lock); cdp->cpu_running = TRUE; + started_cpu(); simple_unlock(&x86_topo_lock); + flush_tlb_raw(); } +extern void etimer_timer_expire(void *arg); + void i386_deactivate_cpu(void) { @@ -988,6 +1298,10 @@ i386_deactivate_cpu(void) cdp->cpu_running = FALSE; simple_unlock(&x86_topo_lock); + timer_queue_shutdown(&cdp->rtclock_timer.queue); + cdp->rtclock_timer.deadline = EndOfAllTime; + mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL); + /* * In case a rendezvous/braodcast/call was initiated to this cpu * before we cleared cpu_running, we must perform any actions due. @@ -1013,7 +1327,7 @@ void mp_kdp_enter(void) { unsigned int cpu; - unsigned int ncpus; + unsigned int ncpus = 0; unsigned int my_cpu; uint64_t tsc_timeout; @@ -1025,21 +1339,33 @@ mp_kdp_enter(void) * stopping others. */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); + my_cpu = cpu_number(); + + if (my_cpu == (unsigned) debugger_cpu) { + kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n"); + kdp_reset(); + return; + } + + cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); simple_lock(&mp_kdp_lock); - if (pmsafe_debug) + if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); while (mp_kdp_trap) { simple_unlock(&mp_kdp_lock); DBG("mp_kdp_enter() race lost\n"); - mp_kdp_wait(TRUE); +#if MACH_KDP + mp_kdp_wait(TRUE, FALSE); +#endif simple_lock(&mp_kdp_lock); } - my_cpu = cpu_number(); debugger_cpu = my_cpu; + ncpus = 1; mp_kdp_ncpus = 1; /* self */ mp_kdp_trap = TRUE; + debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time; simple_unlock(&mp_kdp_lock); /* @@ -1047,7 +1373,7 @@ mp_kdp_enter(void) */ DBG("mp_kdp_enter() signaling other processors\n"); if (force_immediate_debugger_NMI == FALSE) { - for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; ncpus++; @@ -1064,7 +1390,10 @@ mp_kdp_enter(void) * "unsafe-to-interrupt" points such as the trampolines, * but neither do we want to lose state by waiting too long. */ - tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000); + tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL); + + if (virtualized) + tsc_timeout = ~0ULL; while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { /* @@ -1096,7 +1425,7 @@ mp_kdp_enter(void) } DBG("mp_kdp_enter() %u processors done %s\n", - mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); + (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); postcode(MP_KDP_ENTER); } @@ -1111,23 +1440,53 @@ cpu_signal_pending(int cpu, mp_event_t event) retval = TRUE; return retval; } - + +long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func, + void *arg0, void *arg1) +{ + if (lcpu > (real_ncpus - 1)) + return -1; + + if (func == NULL) + return -1; + + kdp_xcpu_call_func.func = func; + kdp_xcpu_call_func.ret = -1; + kdp_xcpu_call_func.arg0 = arg0; + kdp_xcpu_call_func.arg1 = arg1; + kdp_xcpu_call_func.cpu = lcpu; + DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu); + while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE) + cpu_pause(); + return kdp_xcpu_call_func.ret; +} + +static void +kdp_x86_xcpu_poll(void) +{ + if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) { + kdp_xcpu_call_func.ret = + kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0, + kdp_xcpu_call_func.arg1, + cpu_number()); + kdp_xcpu_call_func.cpu = KDP_XCPU_NONE; + } +} static void -mp_kdp_wait(boolean_t flush) +mp_kdp_wait(boolean_t flush, boolean_t isNMI) { DBG("mp_kdp_wait()\n"); /* If an I/O port has been specified as a debugging aid, issue a read */ panic_io_port_read(); +#if CONFIG_MCA /* If we've trapped due to a machine-check, save MCA registers */ mca_check_save(); - - if (pmsafe_debug) - pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); +#endif atomic_incl((volatile long *)&mp_kdp_ncpus, 1); - while (mp_kdp_trap) { + while (mp_kdp_trap || (isNMI == TRUE)) { /* * A TLB shootdown request may be pending--this would result * in the requesting processor waiting in PMAP_UPDATE_TLBS() @@ -1136,12 +1495,11 @@ mp_kdp_wait(boolean_t flush) */ if (flush) handle_pending_TLB_flushes(); + + kdp_x86_xcpu_poll(); cpu_pause(); } - if (pmsafe_debug) - pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); - atomic_decl((volatile long *)&mp_kdp_ncpus, 1); DBG("mp_kdp_wait() done\n"); } @@ -1152,6 +1510,9 @@ mp_kdp_exit(void) DBG("mp_kdp_exit()\n"); debugger_cpu = -1; atomic_decl((volatile long *)&mp_kdp_ncpus, 1); + + debugger_exit_time = mach_absolute_time(); + mp_kdp_trap = FALSE; __asm__ volatile("mfence"); @@ -1168,15 +1529,24 @@ mp_kdp_exit(void) cpu_pause(); } - if (pmsafe_debug) + if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); + debugger_exit_time = mach_absolute_time(); + DBG("mp_kdp_exit() done\n"); (void) ml_set_interrupts_enabled(mp_kdp_state); postcode(0); } #endif /* MACH_KDP */ +boolean_t +mp_recent_debugger_activity() { + uint64_t abstime = mach_absolute_time(); + return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) || + ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance)); +} + /*ARGSUSED*/ void init_ast_check( @@ -1188,249 +1558,149 @@ void cause_ast_check( processor_t processor) { - int cpu = PROCESSOR_DATA(processor, slot_num); + int cpu = processor->cpu_id; if (cpu != cpu_number()) { i386_signal_cpu(cpu, MP_AST, ASYNC); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0); } } -#if MACH_KDB -/* - * invoke kdb on slave processors - */ - void -remote_kdb(void) +slave_machine_init(void *param) { - unsigned int my_cpu = cpu_number(); - unsigned int cpu; - int kdb_ncpus; - uint64_t tsc_timeout = 0; - - mp_kdb_trap = TRUE; - mp_kdb_ncpus = 1; - for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) - continue; - kdb_ncpus++; - i386_signal_cpu(cpu, MP_KDB, ASYNC); - } - DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus); - - tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000); + /* + * Here in process context, but with interrupts disabled. + */ + DBG("slave_machine_init() CPU%d\n", get_cpu_number()); - while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) { - /* - * a TLB shootdown request may be pending... this would result in the requesting - * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. - * Process it, so it can now enter mp_kdp_wait() + if (param == FULL_SLAVE_INIT) { + /* + * Cold start */ - handle_pending_TLB_flushes(); - - cpu_pause(); + clock_init(); + cpu_machine_init(); /* Interrupts enabled hereafter */ + mp_cpus_call_cpu_init(); } - DBG("mp_kdp_enter() %d processors done %s\n", - mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out"); } -static void -mp_kdb_wait(void) +#undef cpu_number +int cpu_number(void) { - DBG("mp_kdb_wait()\n"); - - /* If an I/O port has been specified as a debugging aid, issue a read */ - panic_io_port_read(); - - atomic_incl(&mp_kdb_ncpus, 1); - while (mp_kdb_trap) { - /* - * a TLB shootdown request may be pending... this would result in the requesting - * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. - * Process it, so it can now enter mp_kdp_wait() - */ - handle_pending_TLB_flushes(); - - cpu_pause(); - } - atomic_decl((volatile long *)&mp_kdb_ncpus, 1); - DBG("mp_kdb_wait() done\n"); + return get_cpu_number(); } -/* - * Clear kdb interrupt - */ - -void -clear_kdb_intr(void) +static void +cpu_prewarm_init() { - mp_disable_preemption(); - i_bit_clear(MP_KDB, ¤t_cpu_datap()->cpu_signals); - mp_enable_preemption(); + int i; + + simple_lock_init(&cpu_warm_lock, 0); + queue_init(&cpu_warm_call_list); + for (i = 0; i < NUM_CPU_WARM_CALLS; i++) { + enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]); + } } -void -mp_kdb_exit(void) +static timer_call_t +grab_warm_timer_call() { - DBG("mp_kdb_exit()\n"); - atomic_decl((volatile long *)&mp_kdb_ncpus, 1); - mp_kdb_trap = FALSE; - __asm__ volatile("mfence"); - - while (mp_kdb_ncpus > 0) { - /* - * a TLB shootdown request may be pending... this would result in the requesting - * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. - * Process it, so it can now enter mp_kdp_wait() - */ - handle_pending_TLB_flushes(); + spl_t x; + timer_call_t call = NULL; - cpu_pause(); + x = splsched(); + simple_lock(&cpu_warm_lock); + if (!queue_empty(&cpu_warm_call_list)) { + call = (timer_call_t) dequeue_head(&cpu_warm_call_list); } + simple_unlock(&cpu_warm_lock); + splx(x); - DBG("mp_kdb_exit() done\n"); + return call; } -#endif /* MACH_KDB */ - static void -do_init_slave(boolean_t fast_restart) +free_warm_timer_call(timer_call_t call) { - void *init_param = FULL_SLAVE_INIT; - - postcode(I386_INIT_SLAVE); - - if (!fast_restart) { - /* Ensure that caching and write-through are enabled */ - set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); - - DBG("i386_init_slave() CPU%d: phys (%d) active.\n", - get_cpu_number(), get_cpu_phys_number()); - - assert(!ml_get_interrupts_enabled()); + spl_t x; - cpu_mode_init(current_cpu_datap()); - - mca_cpu_init(); - - lapic_configure(); - LAPIC_DUMP(); - LAPIC_CPU_MAP_DUMP(); - - init_fpu(); - - mtrr_update_cpu(); - } else - init_param = FAST_SLAVE_INIT; - - /* resume VT operation */ - vmx_resume(); - - if (!fast_restart) - pat_init(); - - cpu_thread_init(); /* not strictly necessary */ - - cpu_init(); /* Sets cpu_running which starter cpu waits for */ - - slave_main(init_param); - - panic("do_init_slave() returned from slave_main()"); + x = splsched(); + simple_lock(&cpu_warm_lock); + enqueue_head(&cpu_warm_call_list, (queue_entry_t)call); + simple_unlock(&cpu_warm_lock); + splx(x); } /* - * i386_init_slave() is called from pstart. - * We're in the cpu's interrupt stack with interrupts disabled. - * At this point we are in legacy mode. We need to switch on IA32e - * if the mode is set to 64-bits. + * Runs in timer call context (interrupts disabled). */ -void -i386_init_slave(void) +static void +cpu_warm_timer_call_func( + call_entry_param_t p0, + __unused call_entry_param_t p1) { - do_init_slave(FALSE); + free_warm_timer_call((timer_call_t)p0); + return; } /* - * i386_init_slave_fast() is called from pmCPUHalt. - * We're running on the idle thread and need to fix up - * some accounting and get it so that the scheduler sees this - * CPU again. + * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0). */ -void -i386_init_slave_fast(void) -{ - do_init_slave(TRUE); -} - -void -slave_machine_init(void *param) +static void +_cpu_warm_setup( + void *arg) { - /* - * Here in process context, but with interrupts disabled. - */ - DBG("slave_machine_init() CPU%d\n", get_cpu_number()); + cpu_warm_data_t cwdp = (cpu_warm_data_t)arg; - if (param == FULL_SLAVE_INIT) { - /* - * Cold start - */ - clock_init(); + timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL); + cwdp->cwd_result = 0; - cpu_machine_init(); /* Interrupts enabled hereafter */ - } + return; } -#undef cpu_number() -int cpu_number(void) +/* + * Not safe to call with interrupts disabled. + */ +kern_return_t +ml_interrupt_prewarm( + uint64_t deadline) { - return get_cpu_number(); -} - -#if MACH_KDB -#include - -#define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */ + struct cpu_warm_data cwd; + timer_call_t call; + cpu_t ct; + if (ml_get_interrupts_enabled() == FALSE) { + panic("%s: Interrupts disabled?\n", __FUNCTION__); + } -#if TRAP_DEBUG -#define MTRAPS 100 -struct mp_trap_hist_struct { - unsigned char type; - unsigned char data[5]; -} trap_hist[MTRAPS], *cur_trap_hist = trap_hist, - *max_trap_hist = &trap_hist[MTRAPS]; + /* + * If the platform doesn't need our help, say that we succeeded. + */ + if (!ml_get_interrupt_prewake_applicable()) { + return KERN_SUCCESS; + } -void db_trap_hist(void); + /* + * Grab a timer call to use. + */ + call = grab_warm_timer_call(); + if (call == NULL) { + return KERN_RESOURCE_SHORTAGE; + } -/* - * SPL: - * 1: new spl - * 2: old spl - * 3: new tpr - * 4: old tpr - * INT: - * 1: int vec - * 2: old spl - * 3: new spl - * 4: post eoi tpr - * 5: exit tpr - */ + timer_call_setup(call, cpu_warm_timer_call_func, call); + cwd.cwd_call = call; + cwd.cwd_deadline = deadline; + cwd.cwd_result = 0; -void -db_trap_hist(void) -{ - int i,j; - for(i=0;i=cur_trap_hist)?"*":" ", - (trap_hist[i].type == 1)?"SPL":"INT"); - for(j=0;j<5;j++) - db_printf(" %02x", trap_hist[i].data[j]); - db_printf("\n"); - } - + /* + * For now, non-local interrupts happen on the master processor. + */ + ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd); + if (ct == 0) { + free_warm_timer_call(call); + return KERN_FAILURE; + } else { + return cwd.cwd_result; + } } -#endif /* TRAP_DEBUG */ -#endif /* MACH_KDB */ -