X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8f6c56a50524aa785f7e596d52dddfb331e18961..cf7d32b81c573a0536dc4da4157f9c26f8d0bed3:/osfmk/i386/mp.c diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 8fe553a7b..fb7afc4d0 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -57,16 +58,36 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include +#include + +#include +#include + +#include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif #if MP_DEBUG #define PAUSE delay(1000000) @@ -76,24 +97,6 @@ #define PAUSE #endif /* MP_DEBUG */ -/* - * By default, use high vectors to leave vector space for systems - * with multiple I/O APIC's. However some systems that boot with - * local APIC disabled will hang in SMM when vectors greater than - * 0x5F are used. Those systems are not expected to have I/O APIC - * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect. - */ -#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0 -#define LAPIC_REDUCED_INTERRUPT_BASE 0x50 -/* - * Specific lapic interrupts are relative to this base: - */ -#define LAPIC_PERFCNT_INTERRUPT 0xB -#define LAPIC_TIMER_INTERRUPT 0xC -#define LAPIC_SPURIOUS_INTERRUPT 0xD -#define LAPIC_INTERPROCESSOR_INTERRUPT 0xE -#define LAPIC_ERROR_INTERRUPT 0xF - /* Initialize lapic_id so cpu_number() works on non SMP systems */ unsigned long lapic_id_initdata = 0; unsigned long lapic_id = (unsigned long)&lapic_id_initdata; @@ -101,6 +104,7 @@ vm_offset_t lapic_start; static i386_intr_func_t lapic_timer_func; static i386_intr_func_t lapic_pmi_func; +static i386_intr_func_t lapic_thermal_func; /* TRUE if local APIC was enabled by the OS not by the BIOS */ static boolean_t lapic_os_enabled = FALSE; @@ -110,23 +114,47 @@ int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE; void slave_boot_init(void); -static void mp_kdp_wait(void); +#if MACH_KDB +static void mp_kdb_wait(void); +volatile boolean_t mp_kdb_trap = FALSE; +volatile long mp_kdb_ncpus = 0; +#endif + +static void mp_kdp_wait(boolean_t flush); static void mp_rendezvous_action(void); +static void mp_broadcast_action(void); + +static int NMIInterruptHandler(x86_saved_state_t *regs); +static boolean_t cpu_signal_pending(int cpu, mp_event_t event); +static void cpu_NMI_interrupt(int cpu); boolean_t smp_initialized = FALSE; +boolean_t force_immediate_debugger_NMI = FALSE; decl_simple_lock_data(,mp_kdp_lock); decl_mutex_data(static, mp_cpu_boot_lock); /* Variables needed for MP rendezvous. */ +decl_simple_lock_data(,mp_rv_lock); static void (*mp_rv_setup_func)(void *arg); static void (*mp_rv_action_func)(void *arg); static void (*mp_rv_teardown_func)(void *arg); static void *mp_rv_func_arg; static int mp_rv_ncpus; -static long mp_rv_waiters[2]; -decl_simple_lock_data(,mp_rv_lock); + /* Cache-aligned barriers: */ +static volatile long mp_rv_entry __attribute__((aligned(64))); +static volatile long mp_rv_exit __attribute__((aligned(64))); +static volatile long mp_rv_complete __attribute__((aligned(64))); + +/* Variables needed for MP broadcast. */ +static void (*mp_bc_action_func)(void *arg); +static void *mp_bc_func_arg; +static int mp_bc_ncpus; +static volatile long mp_bc_count; +decl_mutex_data(static, mp_bc_lock); + +static void mp_cpus_call_action(void); int lapic_to_cpu[MAX_CPUS]; int cpu_to_lapic[MAX_CPUS]; @@ -149,6 +177,24 @@ lapic_cpu_map(int apic_id, int cpu) lapic_to_cpu[apic_id] = cpu; } +/* + * Retrieve the local apic ID a cpu. + * + * Returns the local apic ID for the given processor. + * If the processor does not exist or apic not configured, returns -1. + */ + +uint32_t +ml_get_apicid(uint32_t cpu) +{ + if(cpu >= (uint32_t)MAX_CPUS) + return 0xFFFFFFFF; /* Return -1 if cpu too big */ + + /* Return the apic ID (or -1 if not configured) */ + return (uint32_t)cpu_to_lapic[cpu]; + +} + #ifdef MP_DEBUG static void lapic_cpu_map_dump(void) @@ -175,18 +221,6 @@ lapic_cpu_map_dump(void) #define LAPIC_DUMP() #endif /* MP_DEBUG */ -#define LAPIC_REG(reg) \ - (*((volatile int *)(lapic_start + LAPIC_##reg))) -#define LAPIC_REG_OFFSET(reg,off) \ - (*((volatile int *)(lapic_start + LAPIC_##reg + (off)))) - -#define LAPIC_VECTOR(src) \ - (lapic_interrupt_base + LAPIC_##src##_INTERRUPT) - -#define LAPIC_ISR_IS_SET(base,src) \ - (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \ - (1 <<((base + LAPIC_##src##_INTERRUPT)%32))) - #if GPROF /* * Initialize dummy structs for profiling. These aren't used but @@ -206,8 +240,6 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; #define GPROF_INIT() #endif /* GPROF */ -extern void master_up(void); - void smp_init(void) { @@ -222,6 +254,7 @@ smp_init(void) simple_lock_init(&mp_kdp_lock, 0); simple_lock_init(&mp_rv_lock, 0); mutex_init(&mp_cpu_boot_lock, 0); + mutex_init(&mp_bc_lock, 0); console_init(); /* Local APIC? */ @@ -241,17 +274,22 @@ smp_init(void) /* Establish a map to the local apic */ lapic_start = vm_map_min(kernel_map); - result = vm_map_find_space(kernel_map, &lapic_start, - round_page(LAPIC_SIZE), 0, &entry); + result = vm_map_find_space(kernel_map, + (vm_map_address_t *) &lapic_start, + round_page(LAPIC_SIZE), 0, + VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry); if (result != KERN_SUCCESS) { panic("smp_init: vm_map_find_entry FAILED (err=%d)", result); } vm_map_unlock(kernel_map); +/* Map in the local APIC non-cacheable, as recommended by Intel + * in section 8.4.1 of the "System Programming Guide". + */ pmap_enter(pmap_kernel(), lapic_start, (ppnum_t) i386_btop(lapic_base), - VM_PROT_READ|VM_PROT_WRITE, - VM_WIMG_USE_DEFAULT, + VM_PROT_READ|VM_PROT_WRITE, + VM_WIMG_IO, TRUE); lapic_id = (unsigned long)(lapic_start + LAPIC_ID); @@ -262,19 +300,16 @@ smp_init(void) /* Set up the lapic_id <-> cpu_number map and add this boot processor */ lapic_cpu_map_init(); lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0); + kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]); lapic_init(); cpu_thread_init(); - if (pmc_init() != KERN_SUCCESS) - printf("Performance counters not available\n"); - GPROF_INIT(); DBGLOG_CPU_INIT(master_cpu); slave_boot_init(); - master_up(); smp_initialized = TRUE; @@ -342,6 +377,11 @@ lapic_dump(void) DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED)); + kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n", + LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_VECTOR_MASK, + DM[(LAPIC_REG(LVT_THERMAL)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], + (LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", + BOOL(LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_MASKED)); kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n", LAPIC_REG(LVT_LINT0)&LAPIC_LVT_VECTOR_MASK, DM[(LAPIC_REG(LVT_LINT0)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], @@ -379,6 +419,26 @@ lapic_dump(void) kprintf("\n"); } +#if MACH_KDB +/* + * Displays apic junk + * + * da + */ +void +db_apic(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + + lapic_dump(); + + return; +} + +#endif + boolean_t lapic_probe(void) { @@ -403,7 +463,7 @@ lapic_probe(void) /* * Re-initialize cpu features info and re-check. */ - set_cpu_model(); + cpuid_set_info(); if (cpuid_features() & CPUID_FEATURE_APIC) { printf("Local APIC discovered and enabled\n"); lapic_os_enabled = TRUE; @@ -451,7 +511,7 @@ lapic_shutdown(void) rdmsr(MSR_IA32_APIC_BASE, lo, hi); lo &= ~MSR_IA32_APIC_BASE_ENABLE; wrmsr(MSR_IA32_APIC_BASE, lo, hi); - set_cpu_model(); + cpuid_set_info(); mp_enable_preemption(); } @@ -484,10 +544,12 @@ lapic_init(void) /* Perfmon: unmasked */ LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); + /* Thermal: unmasked */ + LAPIC_REG(LVT_THERMAL) = LAPIC_VECTOR(THERMAL); + lapic_esr_clear(); LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR); - } void @@ -545,6 +607,12 @@ lapic_set_pmi_func(i386_intr_func_t func) lapic_pmi_func = func; } +void +lapic_set_thermal_func(i386_intr_func_t func) +{ + lapic_thermal_func = func; +} + static inline void _lapic_end_of_interrupt(void) { @@ -558,42 +626,77 @@ lapic_end_of_interrupt(void) } int -lapic_interrupt(int interrupt, void *state) +lapic_interrupt(int interrupt, x86_saved_state_t *state) { + int retval = 0; + + /* Did we just field an interruption for the HPET comparator? */ + if(x86_core()->HpetVec == ((uint32_t)interrupt - 0x40)) { + /* Yes, go handle it... */ + retval = HPETInterrupt(); + /* Was it really handled? */ + if(retval) { + /* If so, EOI the 'rupt */ + _lapic_end_of_interrupt(); + /* + * and then leave, + * indicating that this has been handled + */ + return 1; + } + } + interrupt -= lapic_interrupt_base; - if (interrupt < 0) - return 0; + if (interrupt < 0) { + if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base)) { + retval = NMIInterruptHandler(state); + _lapic_end_of_interrupt(); + return retval; + } + else + return 0; + } switch(interrupt) { case LAPIC_PERFCNT_INTERRUPT: if (lapic_pmi_func != NULL) - (*lapic_pmi_func)( - (struct i386_interrupt_state *) state); + (*lapic_pmi_func)(NULL); /* Clear interrupt masked */ LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); _lapic_end_of_interrupt(); - return 1; + retval = 1; + break; case LAPIC_TIMER_INTERRUPT: _lapic_end_of_interrupt(); if (lapic_timer_func != NULL) - (*lapic_timer_func)( - (struct i386_interrupt_state *) state); - return 1; + (*lapic_timer_func)(state); + retval = 1; + break; + case LAPIC_THERMAL_INTERRUPT: + if (lapic_thermal_func != NULL) + (*lapic_thermal_func)(NULL); + _lapic_end_of_interrupt(); + retval = 1; + break; case LAPIC_ERROR_INTERRUPT: lapic_dump(); panic("Local APIC error\n"); _lapic_end_of_interrupt(); - return 1; + retval = 1; + break; case LAPIC_SPURIOUS_INTERRUPT: kprintf("SPIV\n"); /* No EOI required here */ - return 1; + retval = 1; + break; case LAPIC_INTERPROCESSOR_INTERRUPT: - cpu_signal_handler((struct i386_interrupt_state *) state); _lapic_end_of_interrupt(); - return 1; + cpu_signal_handler(state); + retval = 1; + break; } - return 0; + + return retval; } void @@ -644,8 +747,14 @@ intel_startCPU( DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD); - /* Initialize (or re-initialize) the descriptor tables for this cpu. */ - mp_desc_init(cpu_datap(slot_num), FALSE); + /* + * Initialize (or re-initialize) the descriptor tables for this cpu. + * Propagate processor mode to slave. + */ + if (cpu_mode_is64bit()) + cpu_desc_init64(cpu_datap(slot_num), FALSE); + else + cpu_desc_init(cpu_datap(slot_num), FALSE); /* Serialize use of the slave boot stack. */ mutex_lock(&mp_cpu_boot_lock); @@ -683,21 +792,20 @@ intel_startCPU( mutex_unlock(&mp_cpu_boot_lock); if (!cpu_datap(slot_num)->cpu_running) { - DBG("Failed to start CPU %02d\n", slot_num); + kprintf("Failed to start CPU %02d\n", slot_num); printf("Failed to start CPU %02d, rebooting...\n", slot_num); delay(1000000); cpu_shutdown(); return KERN_SUCCESS; } else { - DBG("Started CPU %02d\n", slot_num); - printf("Started CPU %02d\n", slot_num); + kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); return KERN_SUCCESS; } } extern char slave_boot_base[]; extern char slave_boot_end[]; -extern void pstart(void); +extern void slave_pstart(void); void slave_boot_init(void) @@ -715,7 +823,7 @@ slave_boot_init(void) * The slave boot code is responsible for switching to protected * mode and then jumping to the common startup, _start(). */ - bcopy_phys((addr64_t) kvtophys((vm_offset_t) slave_boot_base), + bcopy_phys(kvtophys((vm_offset_t) slave_boot_base), (addr64_t) MP_BOOT, slave_boot_end-slave_boot_base); @@ -730,9 +838,9 @@ slave_boot_init(void) * common startup entry. */ DBG("writing 0x%x at phys 0x%x\n", - kvtophys((vm_offset_t) &pstart), MP_MACH_START+MP_BOOT); + kvtophys((vm_offset_t) &slave_pstart), MP_MACH_START+MP_BOOT); ml_phys_write_word(MP_MACH_START+MP_BOOT, - kvtophys((vm_offset_t) &pstart)); + (unsigned int)kvtophys((vm_offset_t) &slave_pstart)); /* Flush caches */ __asm__("wbinvd"); @@ -747,7 +855,7 @@ MP_EVENT_NAME_DECL(); #endif /* MP_DEBUG */ void -cpu_signal_handler(__unused struct i386_interrupt_state *regs) +cpu_signal_handler(x86_saved_state_t *regs) { int my_cpu; volatile int *my_word; @@ -763,13 +871,20 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) do { #if MACH_KDB && MACH_ASSERT if (i-- <= 0) - Debugger("cpu_signal_handler"); + Debugger("cpu_signal_handler: signals did not clear"); #endif /* MACH_KDB && MACH_ASSERT */ #if MACH_KDP if (i_bit(MP_KDP, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_KDP); i_bit_clear(MP_KDP, my_word); - mp_kdp_wait(); +/* Ensure that the i386_kernel_state at the base of the + * current thread's stack (if any) is synchronized with the + * context at the moment of the interrupt, to facilitate + * access through the debugger. + * XXX 64-bit state? + */ + sync_iss_to_iks(saved_state32(regs)); + mp_kdp_wait(TRUE); } else #endif /* MACH_KDP */ if (i_bit(MP_TLB_FLUSH, my_word)) { @@ -782,16 +897,28 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) ast_check(cpu_to_processor(my_cpu)); #if MACH_KDB } else if (i_bit(MP_KDB, my_word)) { - extern kdb_is_slave[]; i_bit_clear(MP_KDB, my_word); - kdb_is_slave[my_cpu]++; - kdb_kintr(); + current_cpu_datap()->cpu_kdb_is_slave++; + mp_kdb_wait(); + current_cpu_datap()->cpu_kdb_is_slave--; #endif /* MACH_KDB */ } else if (i_bit(MP_RENDEZVOUS, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); i_bit_clear(MP_RENDEZVOUS, my_word); mp_rendezvous_action(); + } else if (i_bit(MP_BROADCAST, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_BROADCAST); + i_bit_clear(MP_BROADCAST, my_word); + mp_broadcast_action(); + } else if (i_bit(MP_CHUD, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_CHUD); + i_bit_clear(MP_CHUD, my_word); + chudxnu_cpu_signal_handler(); + } else if (i_bit(MP_CALL, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_CALL); + i_bit_clear(MP_CALL, my_word); + mp_cpus_call_action(); } } while (*my_word); @@ -799,36 +926,100 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) } +/* We want this to show up in backtraces, hence marked noinline. + */ +static int __attribute__((noinline)) +NMIInterruptHandler(x86_saved_state_t *regs) +{ + boolean_t state = ml_set_interrupts_enabled(FALSE); + sync_iss_to_iks_unconditionally(regs); + mp_kdp_wait(FALSE); + (void) ml_set_interrupts_enabled(state); + return 1; +} + #ifdef MP_DEBUG extern int max_lock_loops; +int trappedalready = 0; /* (BRINGUP */ #endif /* MP_DEBUG */ -void -cpu_interrupt(int cpu) + +static void +i386_cpu_IPI(int cpu) { boolean_t state; + +#ifdef MP_DEBUG + if(cpu_datap(cpu)->cpu_signals & 6) { /* (BRINGUP) */ + kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu); + } +#endif /* MP_DEBUG */ - if (smp_initialized) { +#if MACH_KDB +#ifdef MP_DEBUG + if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) { /* (BRINGUP) */ + if(kdb_cpu != cpu_number()) { + trappedalready = 1; + panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", + cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu); + } + } +#endif /* MP_DEBUG */ +#endif - /* Wait for previous interrupt to be delivered... */ + /* Wait for previous interrupt to be delivered... */ #ifdef MP_DEBUG - int pending_busy_count = 0; - while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { - if (++pending_busy_count > max_lock_loops) - panic("cpus_interrupt() deadlock\n"); + int pending_busy_count = 0; + while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { + if (++pending_busy_count > max_lock_loops) + panic("i386_cpu_IPI() deadlock\n"); #else - while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { + while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { #endif /* MP_DEBUG */ - cpu_pause(); - } + cpu_pause(); + } + state = ml_set_interrupts_enabled(FALSE); + LAPIC_REG(ICRD) = + cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT; + LAPIC_REG(ICR) = + LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED; + (void) ml_set_interrupts_enabled(state); +} + +/* + * cpu_interrupt is really just to be used by the scheduler to + * get a CPU's attention it may not always issue an IPI. If an + * IPI is always needed then use i386_cpu_IPI. + */ +void +cpu_interrupt(int cpu) +{ + if (smp_initialized + && pmCPUExitIdle(cpu_datap(cpu))) { + i386_cpu_IPI(cpu); + } +} + +/* + * Send a true NMI via the local APIC to the specified CPU. + */ +static void +cpu_NMI_interrupt(int cpu) +{ + boolean_t state; + + if (smp_initialized) { state = ml_set_interrupts_enabled(FALSE); +/* Program the interrupt command register */ LAPIC_REG(ICRD) = cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT; +/* The vector is ignored in this case--the target CPU will enter on the + * NMI vector. + */ LAPIC_REG(ICR) = - LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED; + LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_NMI; (void) ml_set_interrupts_enabled(state); } - } void @@ -836,15 +1027,18 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) { volatile int *signals = &cpu_datap(cpu)->cpu_signals; uint64_t tsc_timeout; - + if (!cpu_datap(cpu)->cpu_running) return; - DBGLOG(cpu_signal, cpu, event); + if (event == MP_TLB_FLUSH) + KERNEL_DEBUG(0xef800020 | DBG_FUNC_START, cpu, 0, 0, 0, 0); + DBGLOG(cpu_signal, cpu, event); + i_bit_set(event, signals); - cpu_interrupt(cpu); + i386_cpu_IPI(cpu); if (mode == SYNC) { again: tsc_timeout = rdtsc64() + (1000*1000*1000); @@ -857,14 +1051,22 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) goto again; } } + if (event == MP_TLB_FLUSH) + KERNEL_DEBUG(0xef800020 | DBG_FUNC_END, cpu, 0, 0, 0, 0); } +/* + * Send event to all running cpus. + * Called with the topology locked. + */ void i386_signal_cpus(mp_event_t event, mp_sync_t mode) { unsigned int cpu; unsigned int my_cpu = cpu_number(); + assert(hw_lock_held(&x86_topo_lock)); + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; @@ -872,12 +1074,18 @@ i386_signal_cpus(mp_event_t event, mp_sync_t mode) } } +/* + * Return the number of running cpus. + * Called with the topology locked. + */ int i386_active_cpus(void) { unsigned int cpu; unsigned int ncpus = 0; + assert(hw_lock_held(&x86_topo_lock)); + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running) ncpus++; @@ -902,24 +1110,39 @@ i386_active_cpus(void) static void mp_rendezvous_action(void) { + boolean_t intrs_enabled; /* setup function */ if (mp_rv_setup_func != NULL) mp_rv_setup_func(mp_rv_func_arg); + + intrs_enabled = ml_get_interrupts_enabled(); + /* spin on entry rendezvous */ - atomic_incl(&mp_rv_waiters[0], 1); - while (*((volatile long *) &mp_rv_waiters[0]) < mp_rv_ncpus) + atomic_incl(&mp_rv_entry, 1); + while (mp_rv_entry < mp_rv_ncpus) { + /* poll for pesky tlb flushes if interrupts disabled */ + if (!intrs_enabled) + handle_pending_TLB_flushes(); cpu_pause(); + } /* action function */ if (mp_rv_action_func != NULL) mp_rv_action_func(mp_rv_func_arg); /* spin on exit rendezvous */ - atomic_incl(&mp_rv_waiters[1], 1); - while (*((volatile long *) &mp_rv_waiters[1]) < mp_rv_ncpus) + atomic_incl(&mp_rv_exit, 1); + while (mp_rv_exit < mp_rv_ncpus) { + if (!intrs_enabled) + handle_pending_TLB_flushes(); cpu_pause(); + } + /* teardown function */ if (mp_rv_teardown_func != NULL) mp_rv_teardown_func(mp_rv_func_arg); + + /* Bump completion count */ + atomic_incl(&mp_rv_complete, 1); } void @@ -948,26 +1171,315 @@ mp_rendezvous(void (*setup_func)(void *), mp_rv_teardown_func = teardown_func; mp_rv_func_arg = arg; - mp_rv_waiters[0] = 0; /* entry rendezvous count */ - mp_rv_waiters[1] = 0; /* exit rendezvous count */ - mp_rv_ncpus = i386_active_cpus(); + mp_rv_entry = 0; + mp_rv_exit = 0; + mp_rv_complete = 0; /* * signal other processors, which will call mp_rendezvous_action() * with interrupts disabled */ + simple_lock(&x86_topo_lock); + mp_rv_ncpus = i386_active_cpus(); i386_signal_cpus(MP_RENDEZVOUS, ASYNC); + simple_unlock(&x86_topo_lock); /* call executor function on this cpu */ mp_rendezvous_action(); + /* + * Spin for everyone to complete. + * This is necessary to ensure that all processors have proceeded + * from the exit barrier before we release the rendezvous structure. + */ + while (mp_rv_complete < mp_rv_ncpus) { + cpu_pause(); + } + + /* Tidy up */ + mp_rv_setup_func = NULL; + mp_rv_action_func = NULL; + mp_rv_teardown_func = NULL; + mp_rv_func_arg = NULL; + /* release lock */ simple_unlock(&mp_rv_lock); } +void +mp_rendezvous_break_lock(void) +{ + simple_lock_init(&mp_rv_lock, 0); +} + +static void +setup_disable_intrs(__unused void * param_not_used) +{ + /* disable interrupts before the first barrier */ + boolean_t intr = ml_set_interrupts_enabled(FALSE); + + current_cpu_datap()->cpu_iflag = intr; + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +static void +teardown_restore_intrs(__unused void * param_not_used) +{ + /* restore interrupt flag following MTRR changes */ + ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag); + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +/* + * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled. + * This is exported for use by kexts. + */ +void +mp_rendezvous_no_intrs( + void (*action_func)(void *), + void *arg) +{ + mp_rendezvous(setup_disable_intrs, + action_func, + teardown_restore_intrs, + arg); +} + +void +handle_pending_TLB_flushes(void) +{ + volatile int *my_word = ¤t_cpu_datap()->cpu_signals; + + if (i_bit(MP_TLB_FLUSH, my_word)) { + DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH); + i_bit_clear(MP_TLB_FLUSH, my_word); + pmap_update_interrupt(); + } +} + +/* + * This is called from cpu_signal_handler() to process an MP_CALL signal. + */ +static void +mp_cpus_call_action(void) +{ + if (mp_rv_action_func != NULL) + mp_rv_action_func(mp_rv_func_arg); + atomic_incl(&mp_rv_complete, 1); +} + +/* + * mp_cpus_call() runs a given function on cpus specified in a given cpu mask. + * If the mode is SYNC, the function is called serially on the target cpus + * in logical cpu order. If the mode is ASYNC, the function is called in + * parallel over the specified cpus. + * The action function may be NULL. + * The cpu mask may include the local cpu. Offline cpus are ignored. + * Return does not occur until the function has completed on all cpus. + * The return value is the number of cpus on which the function was called. + */ +cpu_t +mp_cpus_call( + cpumask_t cpus, + mp_sync_t mode, + void (*action_func)(void *), + void *arg) +{ + cpu_t cpu; + boolean_t intrs_enabled = ml_get_interrupts_enabled(); + boolean_t call_self = FALSE; + + if (!smp_initialized) { + if ((cpus & CPUMASK_SELF) == 0) + return 0; + if (action_func != NULL) { + (void) ml_set_interrupts_enabled(FALSE); + action_func(arg); + ml_set_interrupts_enabled(intrs_enabled); + } + return 1; + } + + /* obtain rendezvous lock */ + simple_lock(&mp_rv_lock); + + /* Use the rendezvous data structures for this call */ + mp_rv_action_func = action_func; + mp_rv_func_arg = arg; + mp_rv_ncpus = 0; + mp_rv_complete = 0; + + simple_lock(&x86_topo_lock); + for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { + if (((cpu_to_cpumask(cpu) & cpus) == 0) || + !cpu_datap(cpu)->cpu_running) + continue; + if (cpu == (cpu_t) cpu_number()) { + /* + * We don't IPI ourself and if calling asynchronously, + * we defer our call until we have signalled all others. + */ + call_self = TRUE; + if (mode == SYNC && action_func != NULL) { + (void) ml_set_interrupts_enabled(FALSE); + action_func(arg); + ml_set_interrupts_enabled(intrs_enabled); + } + } else { + /* + * Bump count of other cpus called and signal this cpu. + * Note: we signal asynchronously regardless of mode + * because we wait on mp_rv_complete either here + * (if mode == SYNC) or later (if mode == ASYNC). + * While spinning, poll for TLB flushes if interrupts + * are disabled. + */ + mp_rv_ncpus++; + i386_signal_cpu(cpu, MP_CALL, ASYNC); + if (mode == SYNC) { + simple_unlock(&x86_topo_lock); + while (mp_rv_complete < mp_rv_ncpus) { + if (!intrs_enabled) + handle_pending_TLB_flushes(); + cpu_pause(); + } + simple_lock(&x86_topo_lock); + } + } + } + simple_unlock(&x86_topo_lock); + + /* + * If calls are being made asynchronously, + * make the local call now if needed, and then + * wait for all other cpus to finish their calls. + */ + if (mode == ASYNC) { + if (call_self && action_func != NULL) { + (void) ml_set_interrupts_enabled(FALSE); + action_func(arg); + ml_set_interrupts_enabled(intrs_enabled); + } + while (mp_rv_complete < mp_rv_ncpus) { + if (!intrs_enabled) + handle_pending_TLB_flushes(); + cpu_pause(); + } + } + + /* Determine the number of cpus called */ + cpu = mp_rv_ncpus + (call_self ? 1 : 0); + + simple_unlock(&mp_rv_lock); + + return cpu; +} + +static void +mp_broadcast_action(void) +{ + /* call action function */ + if (mp_bc_action_func != NULL) + mp_bc_action_func(mp_bc_func_arg); + + /* if we're the last one through, wake up the instigator */ + if (atomic_decl_and_test((volatile long *)&mp_bc_count, 1)) + thread_wakeup(((event_t)(unsigned int *) &mp_bc_count)); +} + +/* + * mp_broadcast() runs a given function on all active cpus. + * The caller blocks until the functions has run on all cpus. + * The caller will also block if there is another pending braodcast. + */ +void +mp_broadcast( + void (*action_func)(void *), + void *arg) +{ + if (!smp_initialized) { + if (action_func != NULL) + action_func(arg); + return; + } + + /* obtain broadcast lock */ + mutex_lock(&mp_bc_lock); + + /* set static function pointers */ + mp_bc_action_func = action_func; + mp_bc_func_arg = arg; + + assert_wait(&mp_bc_count, THREAD_UNINT); + + /* + * signal other processors, which will call mp_broadcast_action() + */ + simple_lock(&x86_topo_lock); + mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */ + mp_bc_count = mp_bc_ncpus; + i386_signal_cpus(MP_BROADCAST, ASYNC); + + /* call executor function on this cpu */ + mp_broadcast_action(); + simple_unlock(&x86_topo_lock); + + /* block for all cpus to have run action_func */ + if (mp_bc_ncpus > 1) + thread_block(THREAD_CONTINUE_NULL); + else + clear_wait(current_thread(), THREAD_AWAKENED); + + /* release lock */ + mutex_unlock(&mp_bc_lock); +} + +void +i386_activate_cpu(void) +{ + cpu_data_t *cdp = current_cpu_datap(); + + assert(!ml_get_interrupts_enabled()); + + if (!smp_initialized) { + cdp->cpu_running = TRUE; + return; + } + + simple_lock(&x86_topo_lock); + cdp->cpu_running = TRUE; + simple_unlock(&x86_topo_lock); +} + +void +i386_deactivate_cpu(void) +{ + cpu_data_t *cdp = current_cpu_datap(); + + assert(!ml_get_interrupts_enabled()); + + simple_lock(&x86_topo_lock); + cdp->cpu_running = FALSE; + simple_unlock(&x86_topo_lock); + + /* + * In case a rendezvous/braodcast/call was initiated to this cpu + * before we cleared cpu_running, we must perform any actions due. + */ + if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals)) + mp_rendezvous_action(); + if (i_bit(MP_BROADCAST, &cdp->cpu_signals)) + mp_broadcast_action(); + if (i_bit(MP_CALL, &cdp->cpu_signals)) + mp_cpus_call_action(); + cdp->cpu_signals = 0; /* all clear */ +} + +int pmsafe_debug = 1; + #if MACH_KDP volatile boolean_t mp_kdp_trap = FALSE; -long mp_kdp_ncpus; +volatile unsigned long mp_kdp_ncpus; boolean_t mp_kdp_state; @@ -988,65 +1500,148 @@ mp_kdp_enter(void) */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); simple_lock(&mp_kdp_lock); + + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + while (mp_kdp_trap) { simple_unlock(&mp_kdp_lock); DBG("mp_kdp_enter() race lost\n"); - mp_kdp_wait(); + mp_kdp_wait(TRUE); simple_lock(&mp_kdp_lock); } mp_kdp_ncpus = 1; /* self */ mp_kdp_trap = TRUE; simple_unlock(&mp_kdp_lock); - /* Deliver a nudge to other cpus, counting how many */ + /* + * Deliver a nudge to other cpus, counting how many + */ DBG("mp_kdp_enter() signaling other processors\n"); - for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { - if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) - continue; - ncpus++; - i386_signal_cpu(cpu, MP_KDP, ASYNC); - } + if (force_immediate_debugger_NMI == FALSE) { + for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + continue; + ncpus++; + i386_signal_cpu(cpu, MP_KDP, ASYNC); + } + /* + * Wait other processors to synchronize + */ + DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); - /* Wait other processors to spin. */ - DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); - tsc_timeout = rdtsc64() + (1000*1000*1000); - while (*((volatile unsigned int *) &mp_kdp_ncpus) != ncpus - && rdtsc64() < tsc_timeout) { - cpu_pause(); + /* + * This timeout is rather arbitrary; we don't want to NMI + * processors that are executing at potentially + * "unsafe-to-interrupt" points such as the trampolines, + * but neither do we want to lose state by waiting too long. + */ + tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000); + + while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { + /* + * A TLB shootdown request may be pending--this would + * result in the requesting processor waiting in + * PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + cpu_pause(); + } + /* If we've timed out, and some processor(s) are still unresponsive, + * interrupt them with an NMI via the local APIC. + */ + if (mp_kdp_ncpus != ncpus) { + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + continue; + if (cpu_signal_pending(cpu, MP_KDP)) + cpu_NMI_interrupt(cpu); + } + } } - DBG("mp_kdp_enter() %d processors done %s\n", - mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); + else + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + continue; + cpu_NMI_interrupt(cpu); + } + + DBG("mp_kdp_enter() %u processors done %s\n", + mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); + postcode(MP_KDP_ENTER); } -static void -mp_kdp_wait(void) +static boolean_t +cpu_signal_pending(int cpu, mp_event_t event) { - boolean_t state; + volatile int *signals = &cpu_datap(cpu)->cpu_signals; + boolean_t retval = FALSE; + + if (i_bit(event, signals)) + retval = TRUE; + return retval; +} + - state = ml_set_interrupts_enabled(TRUE); +static void +mp_kdp_wait(boolean_t flush) +{ DBG("mp_kdp_wait()\n"); - atomic_incl(&mp_kdp_ncpus, 1); + /* If an I/O port has been specified as a debugging aid, issue a read */ + panic_io_port_read(); + + /* If we've trapped due to a machine-check, save MCA registers */ + mca_check_save(); + + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + + atomic_incl((volatile long *)&mp_kdp_ncpus, 1); while (mp_kdp_trap) { + /* + * A TLB shootdown request may be pending--this would result + * in the requesting processor waiting in PMAP_UPDATE_TLBS() + * until this processor handles it. + * Process it, so it can now enter mp_kdp_wait() + */ + if (flush) + handle_pending_TLB_flushes(); cpu_pause(); } - atomic_decl(&mp_kdp_ncpus, 1); + + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); + + atomic_decl((volatile long *)&mp_kdp_ncpus, 1); DBG("mp_kdp_wait() done\n"); - (void) ml_set_interrupts_enabled(state); } void mp_kdp_exit(void) { DBG("mp_kdp_exit()\n"); - atomic_decl(&mp_kdp_ncpus, 1); + atomic_decl((volatile long *)&mp_kdp_ncpus, 1); mp_kdp_trap = FALSE; + __asm__ volatile("mfence"); /* Wait other processors to stop spinning. XXX needs timeout */ DBG("mp_kdp_exit() waiting for processors to resume\n"); - while (*((volatile long *) &mp_kdp_ncpus) > 0) { + while (mp_kdp_ncpus > 0) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + cpu_pause(); } + + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); + DBG("mp_kdp_exit() done\n"); (void) ml_set_interrupts_enabled(mp_kdp_state); postcode(0); @@ -1071,6 +1666,7 @@ cause_ast_check( } } +#if MACH_KDB /* * invoke kdb on slave processors */ @@ -1080,14 +1676,56 @@ remote_kdb(void) { unsigned int my_cpu = cpu_number(); unsigned int cpu; + int kdb_ncpus; + uint64_t tsc_timeout = 0; - mp_disable_preemption(); - for (cpu = 0; cpu < real_ncpus; cpu++) { + mp_kdb_trap = TRUE; + mp_kdb_ncpus = 1; + for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; - i386_signal_cpu(cpu, MP_KDB, SYNC); + kdb_ncpus++; + i386_signal_cpu(cpu, MP_KDB, ASYNC); } - mp_enable_preemption(); + DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus); + + tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000); + + while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + DBG("mp_kdp_enter() %d processors done %s\n", + mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out"); +} + +static void +mp_kdb_wait(void) +{ + DBG("mp_kdb_wait()\n"); + + /* If an I/O port has been specified as a debugging aid, issue a read */ + panic_io_port_read(); + + atomic_incl(&mp_kdb_ncpus, 1); + while (mp_kdb_trap) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + atomic_decl((volatile long *)&mp_kdb_ncpus, 1); + DBG("mp_kdb_wait() done\n"); } /* @@ -1102,9 +1740,35 @@ clear_kdb_intr(void) mp_enable_preemption(); } +void +mp_kdb_exit(void) +{ + DBG("mp_kdb_exit()\n"); + atomic_decl((volatile long *)&mp_kdb_ncpus, 1); + mp_kdb_trap = FALSE; + __asm__ volatile("mfence"); + + while (mp_kdb_ncpus > 0) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + + DBG("mp_kdb_exit() done\n"); +} + +#endif /* MACH_KDB */ + /* * i386_init_slave() is called from pstart. * We're in the cpu's interrupt stack with interrupts disabled. + * At this point we are in legacy mode. We need to switch on IA32e + * if the mode is set to 64-bits. */ void i386_init_slave(void) @@ -1117,16 +1781,28 @@ i386_init_slave(void) DBG("i386_init_slave() CPU%d: phys (%d) active.\n", get_cpu_number(), get_cpu_phys_number()); - lapic_init(); + assert(!ml_get_interrupts_enabled()); + + cpu_mode_init(current_cpu_datap()); + + mca_cpu_init(); + lapic_init(); LAPIC_DUMP(); LAPIC_CPU_MAP_DUMP(); + init_fpu(); + mtrr_update_cpu(); + /* resume VT operation */ + vmx_resume(); + pat_init(); - cpu_init(); + cpu_thread_init(); /* not strictly necessary */ + + cpu_init(); /* Sets cpu_running which starter cpu waits for */ slave_main(); @@ -1137,19 +1813,13 @@ void slave_machine_init(void) { /* - * Here in process context. + * Here in process context, but with interrupts disabled. */ DBG("slave_machine_init() CPU%d\n", get_cpu_number()); - init_fpu(); - - cpu_thread_init(); - - pmc_init(); - - cpu_machine_init(); - clock_init(); + + cpu_machine_init(); /* Interrupts enabled hereafter */ } #undef cpu_number() @@ -1204,44 +1874,5 @@ db_trap_hist(void) } #endif /* TRAP_DEBUG */ - -void db_lapic(int cpu); -unsigned int db_remote_read(int cpu, int reg); -void db_ioapic(unsigned int); -void kdb_console(void); - -void -kdb_console(void) -{ -} - -#define BOOLP(a) ((a)?' ':'!') - -static char *DM[8] = { - "Fixed", - "Lowest Priority", - "Invalid", - "Invalid", - "NMI", - "Reset", - "Invalid", - "ExtINT"}; - -unsigned int -db_remote_read(int cpu, int reg) -{ - return -1; -} - -void -db_lapic(int cpu) -{ -} - -void -db_ioapic(unsigned int ind) -{ -} - #endif /* MACH_KDB */