2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
35 #include <mach_ldebug.h>
38 #include <mach/mach_types.h>
39 #include <mach/kern_return.h>
41 #include <kern/kern_types.h>
42 #include <kern/startup.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
49 #include <kern/misc_protos.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_kern.h>
54 #include <profiling/profile-mk.h>
57 #include <i386/mp_events.h>
58 #include <i386/mp_slave_boot.h>
59 #include <i386/lapic.h>
62 #include <i386/cpuid.h>
63 #include <i386/proc_reg.h>
64 #include <i386/machine_cpu.h>
65 #include <i386/misc_protos.h>
66 #include <i386/mtrr.h>
67 #include <i386/vmx/vmx_cpu.h>
68 #include <i386/postcode.h>
69 #include <i386/perfmon.h>
70 #include <i386/cpu_threads.h>
71 #include <i386/mp_desc.h>
72 #include <i386/trap.h>
73 #include <i386/machine_routines.h>
74 #include <i386/pmCPU.h>
75 #include <i386/machine_check.h>
77 #include <chud/chud_xnu.h>
78 #include <chud/chud_xnu_private.h>
80 #include <sys/kdebug.h>
82 #include <i386/db_machdep.h>
83 #include <ddb/db_aout.h>
84 #include <ddb/db_access.h>
85 #include <ddb/db_sym.h>
86 #include <ddb/db_variables.h>
87 #include <ddb/db_command.h>
88 #include <ddb/db_output.h>
89 #include <ddb/db_expr.h>
93 #define PAUSE delay(1000000)
94 #define DBG(x...) kprintf(x)
100 #define FULL_SLAVE_INIT (NULL)
101 #define FAST_SLAVE_INIT ((void *)(uintptr_t)1)
103 void slave_boot_init(void);
106 static void mp_kdb_wait(void);
107 volatile boolean_t mp_kdb_trap
= FALSE
;
108 volatile long mp_kdb_ncpus
= 0;
111 static void mp_kdp_wait(boolean_t flush
);
112 static void mp_rendezvous_action(void);
113 static void mp_broadcast_action(void);
115 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
116 static int cpu_signal_handler(x86_saved_state_t
*regs
);
117 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
119 boolean_t smp_initialized
= FALSE
;
120 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
121 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
123 decl_simple_lock_data(,mp_kdp_lock
);
125 decl_mutex_data(static, mp_cpu_boot_lock
);
127 /* Variables needed for MP rendezvous. */
128 decl_simple_lock_data(,mp_rv_lock
);
129 static void (*mp_rv_setup_func
)(void *arg
);
130 static void (*mp_rv_action_func
)(void *arg
);
131 static void (*mp_rv_teardown_func
)(void *arg
);
132 static void *mp_rv_func_arg
;
133 static int mp_rv_ncpus
;
134 /* Cache-aligned barriers: */
135 static volatile long mp_rv_entry
__attribute__((aligned(64)));
136 static volatile long mp_rv_exit
__attribute__((aligned(64)));
137 static volatile long mp_rv_complete
__attribute__((aligned(64)));
139 /* Variables needed for MP broadcast. */
140 static void (*mp_bc_action_func
)(void *arg
);
141 static void *mp_bc_func_arg
;
142 static int mp_bc_ncpus
;
143 static volatile long mp_bc_count
;
144 decl_mutex_data(static, mp_bc_lock
);
145 static volatile int debugger_cpu
= -1;
147 static void mp_cpus_call_action(void);
151 * Initialize dummy structs for profiling. These aren't used but
152 * allows hertz_tick() to be built with GPROF defined.
154 struct profile_vars _profile_vars
;
155 struct profile_vars
*_profile_vars_cpus
[MAX_CPUS
] = { &_profile_vars
};
156 #define GPROF_INIT() \
160 /* Hack to initialize pointers to unused profiling structs */ \
161 for (i = 1; i < MAX_CPUS; i++) \
162 _profile_vars_cpus[i] = &_profile_vars; \
171 simple_lock_init(&mp_kdp_lock
, 0);
172 simple_lock_init(&mp_rv_lock
, 0);
173 mutex_init(&mp_cpu_boot_lock
, 0);
174 mutex_init(&mp_bc_lock
, 0);
183 lapic_set_intr_func(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
);
184 lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
);
189 DBGLOG_CPU_INIT(master_cpu
);
193 smp_initialized
= TRUE
;
199 * Poll a CPU to see when it has marked itself as running.
202 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
204 while (iters
-- > 0) {
205 if (cpu_datap(slot_num
)->cpu_running
)
217 int lapic
= cpu_to_lapic
[slot_num
];
221 DBGLOG_CPU_INIT(slot_num
);
223 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
224 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) IdlePTD
);
227 * Initialize (or re-initialize) the descriptor tables for this cpu.
228 * Propagate processor mode to slave.
230 if (cpu_mode_is64bit())
231 cpu_desc_init64(cpu_datap(slot_num
), FALSE
);
233 cpu_desc_init(cpu_datap(slot_num
), FALSE
);
235 /* Serialize use of the slave boot stack. */
236 mutex_lock(&mp_cpu_boot_lock
);
238 mp_disable_preemption();
239 if (slot_num
== get_cpu_number()) {
240 mp_enable_preemption();
241 mutex_unlock(&mp_cpu_boot_lock
);
245 LAPIC_WRITE(ICRD
, lapic
<< LAPIC_ICRD_DEST_SHIFT
);
246 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_INIT
);
249 LAPIC_WRITE(ICRD
, lapic
<< LAPIC_ICRD_DEST_SHIFT
);
250 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(MP_BOOT
>>12));
253 LAPIC_WRITE(ICRD
, lapic
<< LAPIC_ICRD_DEST_SHIFT
);
254 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(MP_BOOT
>>12));
257 #ifdef POSTCODE_DELAY
258 /* Wait much longer if postcodes are displayed for a delay period. */
261 mp_wait_for_cpu_up(slot_num
, i
, 10000);
263 mp_enable_preemption();
264 mutex_unlock(&mp_cpu_boot_lock
);
266 if (!cpu_datap(slot_num
)->cpu_running
) {
267 kprintf("Failed to start CPU %02d\n", slot_num
);
268 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
273 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
279 * Quickly bring a CPU back online which has been halted.
282 intel_startCPU_fast(int slot_num
)
287 * Try to perform a fast restart
289 rc
= pmCPUExitHalt(slot_num
);
290 if (rc
!= KERN_SUCCESS
)
292 * The CPU was not eligible for a fast restart.
297 * Wait until the CPU is back online.
299 mp_disable_preemption();
302 * We use short pauses (1us) for low latency. 30,000 iterations is
303 * longer than a full restart would require so it should be more
306 mp_wait_for_cpu_up(slot_num
, 30000, 1);
307 mp_enable_preemption();
310 * Check to make sure that the CPU is really running. If not,
311 * go through the slow path.
313 if (cpu_datap(slot_num
)->cpu_running
)
314 return(KERN_SUCCESS
);
316 return(KERN_FAILURE
);
319 extern char slave_boot_base
[];
320 extern char slave_boot_end
[];
321 extern void slave_pstart(void);
324 slave_boot_init(void)
326 DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n",
328 kvtophys((vm_offset_t
) slave_boot_base
),
330 slave_boot_end
-slave_boot_base
);
333 * Copy the boot entry code to the real-mode vector area MP_BOOT.
334 * This is in page 1 which has been reserved for this purpose by
335 * machine_startup() from the boot processor.
336 * The slave boot code is responsible for switching to protected
337 * mode and then jumping to the common startup, _start().
339 bcopy_phys(kvtophys((vm_offset_t
) slave_boot_base
),
341 slave_boot_end
-slave_boot_base
);
344 * Zero a stack area above the boot code.
346 DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK
+MP_BOOT
-0x400, 0x400);
347 bzero_phys((addr64_t
)MP_BOOTSTACK
+MP_BOOT
-0x400, 0x400);
350 * Set the location at the base of the stack to point to the
351 * common startup entry.
353 DBG("writing 0x%x at phys 0x%x\n",
354 kvtophys((vm_offset_t
) &slave_pstart
), MP_MACH_START
+MP_BOOT
);
355 ml_phys_write_word(MP_MACH_START
+MP_BOOT
,
356 (unsigned int)kvtophys((vm_offset_t
) &slave_pstart
));
363 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
364 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
366 MP_EVENT_NAME_DECL();
368 #endif /* MP_DEBUG */
371 cpu_signal_handler(x86_saved_state_t
*regs
)
374 volatile int *my_word
;
375 #if MACH_KDB && MACH_ASSERT
377 #endif /* MACH_KDB && MACH_ASSERT */
379 mp_disable_preemption();
381 my_cpu
= cpu_number();
382 my_word
= ¤t_cpu_datap()->cpu_signals
;
385 #if MACH_KDB && MACH_ASSERT
387 Debugger("cpu_signal_handler: signals did not clear");
388 #endif /* MACH_KDB && MACH_ASSERT */
390 if (i_bit(MP_KDP
, my_word
)) {
391 DBGLOG(cpu_handle
,my_cpu
,MP_KDP
);
392 i_bit_clear(MP_KDP
, my_word
);
393 /* Ensure that the i386_kernel_state at the base of the
394 * current thread's stack (if any) is synchronized with the
395 * context at the moment of the interrupt, to facilitate
396 * access through the debugger.
399 sync_iss_to_iks(saved_state32(regs
));
402 #endif /* MACH_KDP */
403 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
404 DBGLOG(cpu_handle
,my_cpu
,MP_TLB_FLUSH
);
405 i_bit_clear(MP_TLB_FLUSH
, my_word
);
406 pmap_update_interrupt();
407 } else if (i_bit(MP_AST
, my_word
)) {
408 DBGLOG(cpu_handle
,my_cpu
,MP_AST
);
409 i_bit_clear(MP_AST
, my_word
);
410 ast_check(cpu_to_processor(my_cpu
));
412 } else if (i_bit(MP_KDB
, my_word
)) {
414 i_bit_clear(MP_KDB
, my_word
);
415 current_cpu_datap()->cpu_kdb_is_slave
++;
417 current_cpu_datap()->cpu_kdb_is_slave
--;
418 #endif /* MACH_KDB */
419 } else if (i_bit(MP_RENDEZVOUS
, my_word
)) {
420 DBGLOG(cpu_handle
,my_cpu
,MP_RENDEZVOUS
);
421 i_bit_clear(MP_RENDEZVOUS
, my_word
);
422 mp_rendezvous_action();
423 } else if (i_bit(MP_BROADCAST
, my_word
)) {
424 DBGLOG(cpu_handle
,my_cpu
,MP_BROADCAST
);
425 i_bit_clear(MP_BROADCAST
, my_word
);
426 mp_broadcast_action();
427 } else if (i_bit(MP_CHUD
, my_word
)) {
428 DBGLOG(cpu_handle
,my_cpu
,MP_CHUD
);
429 i_bit_clear(MP_CHUD
, my_word
);
430 chudxnu_cpu_signal_handler();
431 } else if (i_bit(MP_CALL
, my_word
)) {
432 DBGLOG(cpu_handle
,my_cpu
,MP_CALL
);
433 i_bit_clear(MP_CALL
, my_word
);
434 mp_cpus_call_action();
438 mp_enable_preemption();
444 NMIInterruptHandler(x86_saved_state_t
*regs
)
448 sync_iss_to_iks_unconditionally(regs
);
449 __asm__
volatile("movl %%ebp, %0" : "=m" (stackptr
));
451 if (cpu_number() == debugger_cpu
)
454 if (pmap_tlb_flush_timeout
== TRUE
&& current_cpu_datap()->cpu_tlb_invalid
) {
456 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
457 panic_i386_backtrace(stackptr
, 10, &pstr
[0], TRUE
, regs
);
458 panic_io_port_read();
461 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
472 int max_lock_loops
= 1000000;
473 int trappedalready
= 0; /* (BRINGUP */
474 #endif /* MP_DEBUG */
477 i386_cpu_IPI(int cpu
)
482 if(cpu_datap(cpu
)->cpu_signals
& 6) { /* (BRINGUP) */
483 kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu
)->cpu_signals
, cpu
);
485 #endif /* MP_DEBUG */
489 if(!trappedalready
&& (cpu_datap(cpu
)->cpu_signals
& 6)) { /* (BRINGUP) */
490 if(kdb_cpu
!= cpu_number()) {
492 panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n",
493 cpu_datap(cpu
)->cpu_signals
, cpu
, kdb_cpu
);
496 #endif /* MP_DEBUG */
499 /* Wait for previous interrupt to be delivered... */
501 int pending_busy_count
= 0;
502 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
503 if (++pending_busy_count
> max_lock_loops
)
504 panic("i386_cpu_IPI() deadlock\n");
506 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
507 #endif /* MP_DEBUG */
511 state
= ml_set_interrupts_enabled(FALSE
);
512 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
513 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
) | LAPIC_ICR_DM_FIXED
);
514 (void) ml_set_interrupts_enabled(state
);
518 * cpu_interrupt is really just to be used by the scheduler to
519 * get a CPU's attention it may not always issue an IPI. If an
520 * IPI is always needed then use i386_cpu_IPI.
523 cpu_interrupt(int cpu
)
526 && pmCPUExitIdle(cpu_datap(cpu
))) {
532 * Send a true NMI via the local APIC to the specified CPU.
535 cpu_NMI_interrupt(int cpu
)
539 if (smp_initialized
) {
540 state
= ml_set_interrupts_enabled(FALSE
);
541 /* Program the interrupt command register */
542 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
543 /* The vector is ignored in this case--the target CPU will enter on the
546 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
)|LAPIC_ICR_DM_NMI
);
547 (void) ml_set_interrupts_enabled(state
);
552 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
554 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
555 uint64_t tsc_timeout
;
558 if (!cpu_datap(cpu
)->cpu_running
)
561 if (event
== MP_TLB_FLUSH
)
562 KERNEL_DEBUG(0xef800020 | DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
564 DBGLOG(cpu_signal
, cpu
, event
);
566 i_bit_set(event
, signals
);
570 tsc_timeout
= rdtsc64() + (1000*1000*1000);
571 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
574 if (i_bit(event
, signals
)) {
575 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
580 if (event
== MP_TLB_FLUSH
)
581 KERNEL_DEBUG(0xef800020 | DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
585 * Send event to all running cpus.
586 * Called with the topology locked.
589 i386_signal_cpus(mp_event_t event
, mp_sync_t mode
)
592 unsigned int my_cpu
= cpu_number();
594 assert(hw_lock_held(&x86_topo_lock
));
596 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
597 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
599 i386_signal_cpu(cpu
, event
, mode
);
604 * Return the number of running cpus.
605 * Called with the topology locked.
608 i386_active_cpus(void)
611 unsigned int ncpus
= 0;
613 assert(hw_lock_held(&x86_topo_lock
));
615 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
616 if (cpu_datap(cpu
)->cpu_running
)
623 * All-CPU rendezvous:
624 * - CPUs are signalled,
625 * - all execute the setup function (if specified),
626 * - rendezvous (i.e. all cpus reach a barrier),
627 * - all execute the action function (if specified),
628 * - rendezvous again,
629 * - execute the teardown function (if specified), and then
632 * Note that the supplied external functions _must_ be reentrant and aware
633 * that they are running in parallel and in an unknown lock context.
637 mp_rendezvous_action(void)
639 boolean_t intrs_enabled
;
642 if (mp_rv_setup_func
!= NULL
)
643 mp_rv_setup_func(mp_rv_func_arg
);
645 intrs_enabled
= ml_get_interrupts_enabled();
647 /* spin on entry rendezvous */
648 atomic_incl(&mp_rv_entry
, 1);
649 while (mp_rv_entry
< mp_rv_ncpus
) {
650 /* poll for pesky tlb flushes if interrupts disabled */
652 handle_pending_TLB_flushes();
655 /* action function */
656 if (mp_rv_action_func
!= NULL
)
657 mp_rv_action_func(mp_rv_func_arg
);
658 /* spin on exit rendezvous */
659 atomic_incl(&mp_rv_exit
, 1);
660 while (mp_rv_exit
< mp_rv_ncpus
) {
662 handle_pending_TLB_flushes();
666 /* teardown function */
667 if (mp_rv_teardown_func
!= NULL
)
668 mp_rv_teardown_func(mp_rv_func_arg
);
670 /* Bump completion count */
671 atomic_incl(&mp_rv_complete
, 1);
675 mp_rendezvous(void (*setup_func
)(void *),
676 void (*action_func
)(void *),
677 void (*teardown_func
)(void *),
681 if (!smp_initialized
) {
682 if (setup_func
!= NULL
)
684 if (action_func
!= NULL
)
686 if (teardown_func
!= NULL
)
691 /* obtain rendezvous lock */
692 simple_lock(&mp_rv_lock
);
694 /* set static function pointers */
695 mp_rv_setup_func
= setup_func
;
696 mp_rv_action_func
= action_func
;
697 mp_rv_teardown_func
= teardown_func
;
698 mp_rv_func_arg
= arg
;
705 * signal other processors, which will call mp_rendezvous_action()
706 * with interrupts disabled
708 simple_lock(&x86_topo_lock
);
709 mp_rv_ncpus
= i386_active_cpus();
710 i386_signal_cpus(MP_RENDEZVOUS
, ASYNC
);
711 simple_unlock(&x86_topo_lock
);
713 /* call executor function on this cpu */
714 mp_rendezvous_action();
717 * Spin for everyone to complete.
718 * This is necessary to ensure that all processors have proceeded
719 * from the exit barrier before we release the rendezvous structure.
721 while (mp_rv_complete
< mp_rv_ncpus
) {
726 mp_rv_setup_func
= NULL
;
727 mp_rv_action_func
= NULL
;
728 mp_rv_teardown_func
= NULL
;
729 mp_rv_func_arg
= NULL
;
732 simple_unlock(&mp_rv_lock
);
736 mp_rendezvous_break_lock(void)
738 simple_lock_init(&mp_rv_lock
, 0);
742 setup_disable_intrs(__unused
void * param_not_used
)
744 /* disable interrupts before the first barrier */
745 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
747 current_cpu_datap()->cpu_iflag
= intr
;
748 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
752 teardown_restore_intrs(__unused
void * param_not_used
)
754 /* restore interrupt flag following MTRR changes */
755 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
756 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
760 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
761 * This is exported for use by kexts.
764 mp_rendezvous_no_intrs(
765 void (*action_func
)(void *),
768 mp_rendezvous(setup_disable_intrs
,
770 teardown_restore_intrs
,
775 handle_pending_TLB_flushes(void)
777 volatile int *my_word
= ¤t_cpu_datap()->cpu_signals
;
779 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
780 DBGLOG(cpu_handle
, cpu_number(), MP_TLB_FLUSH
);
781 i_bit_clear(MP_TLB_FLUSH
, my_word
);
782 pmap_update_interrupt();
787 * This is called from cpu_signal_handler() to process an MP_CALL signal.
790 mp_cpus_call_action(void)
792 if (mp_rv_action_func
!= NULL
)
793 mp_rv_action_func(mp_rv_func_arg
);
794 atomic_incl(&mp_rv_complete
, 1);
798 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
799 * If the mode is SYNC, the function is called serially on the target cpus
800 * in logical cpu order. If the mode is ASYNC, the function is called in
801 * parallel over the specified cpus.
802 * The action function may be NULL.
803 * The cpu mask may include the local cpu. Offline cpus are ignored.
804 * Return does not occur until the function has completed on all cpus.
805 * The return value is the number of cpus on which the function was called.
811 void (*action_func
)(void *),
815 boolean_t intrs_enabled
= ml_get_interrupts_enabled();
816 boolean_t call_self
= FALSE
;
818 if (!smp_initialized
) {
819 if ((cpus
& CPUMASK_SELF
) == 0)
821 if (action_func
!= NULL
) {
822 (void) ml_set_interrupts_enabled(FALSE
);
824 ml_set_interrupts_enabled(intrs_enabled
);
829 /* obtain rendezvous lock */
830 simple_lock(&mp_rv_lock
);
832 /* Use the rendezvous data structures for this call */
833 mp_rv_action_func
= action_func
;
834 mp_rv_func_arg
= arg
;
838 simple_lock(&x86_topo_lock
);
839 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
840 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
841 !cpu_datap(cpu
)->cpu_running
)
843 if (cpu
== (cpu_t
) cpu_number()) {
845 * We don't IPI ourself and if calling asynchronously,
846 * we defer our call until we have signalled all others.
849 if (mode
== SYNC
&& action_func
!= NULL
) {
850 (void) ml_set_interrupts_enabled(FALSE
);
852 ml_set_interrupts_enabled(intrs_enabled
);
856 * Bump count of other cpus called and signal this cpu.
857 * Note: we signal asynchronously regardless of mode
858 * because we wait on mp_rv_complete either here
859 * (if mode == SYNC) or later (if mode == ASYNC).
860 * While spinning, poll for TLB flushes if interrupts
864 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
866 simple_unlock(&x86_topo_lock
);
867 while (mp_rv_complete
< mp_rv_ncpus
) {
869 handle_pending_TLB_flushes();
872 simple_lock(&x86_topo_lock
);
876 simple_unlock(&x86_topo_lock
);
879 * If calls are being made asynchronously,
880 * make the local call now if needed, and then
881 * wait for all other cpus to finish their calls.
884 if (call_self
&& action_func
!= NULL
) {
885 (void) ml_set_interrupts_enabled(FALSE
);
887 ml_set_interrupts_enabled(intrs_enabled
);
889 while (mp_rv_complete
< mp_rv_ncpus
) {
891 handle_pending_TLB_flushes();
896 /* Determine the number of cpus called */
897 cpu
= mp_rv_ncpus
+ (call_self
? 1 : 0);
899 simple_unlock(&mp_rv_lock
);
905 mp_broadcast_action(void)
907 /* call action function */
908 if (mp_bc_action_func
!= NULL
)
909 mp_bc_action_func(mp_bc_func_arg
);
911 /* if we're the last one through, wake up the instigator */
912 if (atomic_decl_and_test((volatile long *)&mp_bc_count
, 1))
913 thread_wakeup(((event_t
)(unsigned int *) &mp_bc_count
));
917 * mp_broadcast() runs a given function on all active cpus.
918 * The caller blocks until the functions has run on all cpus.
919 * The caller will also block if there is another pending braodcast.
923 void (*action_func
)(void *),
926 if (!smp_initialized
) {
927 if (action_func
!= NULL
)
932 /* obtain broadcast lock */
933 mutex_lock(&mp_bc_lock
);
935 /* set static function pointers */
936 mp_bc_action_func
= action_func
;
937 mp_bc_func_arg
= arg
;
939 assert_wait(&mp_bc_count
, THREAD_UNINT
);
942 * signal other processors, which will call mp_broadcast_action()
944 simple_lock(&x86_topo_lock
);
945 mp_bc_ncpus
= i386_active_cpus(); /* total including this cpu */
946 mp_bc_count
= mp_bc_ncpus
;
947 i386_signal_cpus(MP_BROADCAST
, ASYNC
);
949 /* call executor function on this cpu */
950 mp_broadcast_action();
951 simple_unlock(&x86_topo_lock
);
953 /* block for all cpus to have run action_func */
955 thread_block(THREAD_CONTINUE_NULL
);
957 clear_wait(current_thread(), THREAD_AWAKENED
);
960 mutex_unlock(&mp_bc_lock
);
964 i386_activate_cpu(void)
966 cpu_data_t
*cdp
= current_cpu_datap();
968 assert(!ml_get_interrupts_enabled());
970 if (!smp_initialized
) {
971 cdp
->cpu_running
= TRUE
;
975 simple_lock(&x86_topo_lock
);
976 cdp
->cpu_running
= TRUE
;
977 simple_unlock(&x86_topo_lock
);
981 i386_deactivate_cpu(void)
983 cpu_data_t
*cdp
= current_cpu_datap();
985 assert(!ml_get_interrupts_enabled());
987 simple_lock(&x86_topo_lock
);
988 cdp
->cpu_running
= FALSE
;
989 simple_unlock(&x86_topo_lock
);
992 * In case a rendezvous/braodcast/call was initiated to this cpu
993 * before we cleared cpu_running, we must perform any actions due.
995 if (i_bit(MP_RENDEZVOUS
, &cdp
->cpu_signals
))
996 mp_rendezvous_action();
997 if (i_bit(MP_BROADCAST
, &cdp
->cpu_signals
))
998 mp_broadcast_action();
999 if (i_bit(MP_CALL
, &cdp
->cpu_signals
))
1000 mp_cpus_call_action();
1001 cdp
->cpu_signals
= 0; /* all clear */
1004 int pmsafe_debug
= 1;
1007 volatile boolean_t mp_kdp_trap
= FALSE
;
1008 volatile unsigned long mp_kdp_ncpus
;
1009 boolean_t mp_kdp_state
;
1017 unsigned int my_cpu
;
1018 uint64_t tsc_timeout
;
1020 DBG("mp_kdp_enter()\n");
1023 * Here to enter the debugger.
1024 * In case of races, only one cpu is allowed to enter kdp after
1027 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1028 simple_lock(&mp_kdp_lock
);
1031 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1033 while (mp_kdp_trap
) {
1034 simple_unlock(&mp_kdp_lock
);
1035 DBG("mp_kdp_enter() race lost\n");
1037 simple_lock(&mp_kdp_lock
);
1039 my_cpu
= cpu_number();
1040 debugger_cpu
= my_cpu
;
1041 mp_kdp_ncpus
= 1; /* self */
1043 simple_unlock(&mp_kdp_lock
);
1046 * Deliver a nudge to other cpus, counting how many
1048 DBG("mp_kdp_enter() signaling other processors\n");
1049 if (force_immediate_debugger_NMI
== FALSE
) {
1050 for (ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1051 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1054 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1057 * Wait other processors to synchronize
1059 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1062 * This timeout is rather arbitrary; we don't want to NMI
1063 * processors that are executing at potentially
1064 * "unsafe-to-interrupt" points such as the trampolines,
1065 * but neither do we want to lose state by waiting too long.
1067 tsc_timeout
= rdtsc64() + (ncpus
* 1000 * 1000);
1069 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1071 * A TLB shootdown request may be pending--this would
1072 * result in the requesting processor waiting in
1073 * PMAP_UPDATE_TLBS() until this processor deals with it.
1074 * Process it, so it can now enter mp_kdp_wait()
1076 handle_pending_TLB_flushes();
1079 /* If we've timed out, and some processor(s) are still unresponsive,
1080 * interrupt them with an NMI via the local APIC.
1082 if (mp_kdp_ncpus
!= ncpus
) {
1083 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1084 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1086 if (cpu_signal_pending(cpu
, MP_KDP
))
1087 cpu_NMI_interrupt(cpu
);
1092 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1093 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1095 cpu_NMI_interrupt(cpu
);
1098 DBG("mp_kdp_enter() %u processors done %s\n",
1099 mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1101 postcode(MP_KDP_ENTER
);
1105 cpu_signal_pending(int cpu
, mp_event_t event
)
1107 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1108 boolean_t retval
= FALSE
;
1110 if (i_bit(event
, signals
))
1117 mp_kdp_wait(boolean_t flush
)
1119 DBG("mp_kdp_wait()\n");
1120 /* If an I/O port has been specified as a debugging aid, issue a read */
1121 panic_io_port_read();
1123 /* If we've trapped due to a machine-check, save MCA registers */
1127 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1129 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1130 while (mp_kdp_trap
) {
1132 * A TLB shootdown request may be pending--this would result
1133 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1134 * until this processor handles it.
1135 * Process it, so it can now enter mp_kdp_wait()
1138 handle_pending_TLB_flushes();
1143 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1145 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1146 DBG("mp_kdp_wait() done\n");
1152 DBG("mp_kdp_exit()\n");
1154 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1155 mp_kdp_trap
= FALSE
;
1156 __asm__
volatile("mfence");
1158 /* Wait other processors to stop spinning. XXX needs timeout */
1159 DBG("mp_kdp_exit() waiting for processors to resume\n");
1160 while (mp_kdp_ncpus
> 0) {
1162 * a TLB shootdown request may be pending... this would result in the requesting
1163 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1164 * Process it, so it can now enter mp_kdp_wait()
1166 handle_pending_TLB_flushes();
1172 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1174 DBG("mp_kdp_exit() done\n");
1175 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1178 #endif /* MACH_KDP */
1183 __unused processor_t processor
)
1189 processor_t processor
)
1191 int cpu
= PROCESSOR_DATA(processor
, slot_num
);
1193 if (cpu
!= cpu_number()) {
1194 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1200 * invoke kdb on slave processors
1206 unsigned int my_cpu
= cpu_number();
1209 uint64_t tsc_timeout
= 0;
1213 for (kdb_ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1214 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1217 i386_signal_cpu(cpu
, MP_KDB
, ASYNC
);
1219 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus
);
1221 tsc_timeout
= rdtsc64() + (kdb_ncpus
* 100 * 1000 * 1000);
1223 while (mp_kdb_ncpus
!= kdb_ncpus
&& rdtsc64() < tsc_timeout
) {
1225 * a TLB shootdown request may be pending... this would result in the requesting
1226 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1227 * Process it, so it can now enter mp_kdp_wait()
1229 handle_pending_TLB_flushes();
1233 DBG("mp_kdp_enter() %d processors done %s\n",
1234 mp_kdb_ncpus
, (mp_kdb_ncpus
== kdb_ncpus
) ? "OK" : "timed out");
1240 DBG("mp_kdb_wait()\n");
1242 /* If an I/O port has been specified as a debugging aid, issue a read */
1243 panic_io_port_read();
1245 atomic_incl(&mp_kdb_ncpus
, 1);
1246 while (mp_kdb_trap
) {
1248 * a TLB shootdown request may be pending... this would result in the requesting
1249 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1250 * Process it, so it can now enter mp_kdp_wait()
1252 handle_pending_TLB_flushes();
1256 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1257 DBG("mp_kdb_wait() done\n");
1261 * Clear kdb interrupt
1265 clear_kdb_intr(void)
1267 mp_disable_preemption();
1268 i_bit_clear(MP_KDB
, ¤t_cpu_datap()->cpu_signals
);
1269 mp_enable_preemption();
1275 DBG("mp_kdb_exit()\n");
1276 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1277 mp_kdb_trap
= FALSE
;
1278 __asm__
volatile("mfence");
1280 while (mp_kdb_ncpus
> 0) {
1282 * a TLB shootdown request may be pending... this would result in the requesting
1283 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1284 * Process it, so it can now enter mp_kdp_wait()
1286 handle_pending_TLB_flushes();
1291 DBG("mp_kdb_exit() done\n");
1294 #endif /* MACH_KDB */
1297 do_init_slave(boolean_t fast_restart
)
1299 void *init_param
= FULL_SLAVE_INIT
;
1301 postcode(I386_INIT_SLAVE
);
1303 if (!fast_restart
) {
1304 /* Ensure that caching and write-through are enabled */
1305 set_cr0(get_cr0() & ~(CR0_NW
|CR0_CD
));
1307 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
1308 get_cpu_number(), get_cpu_phys_number());
1310 assert(!ml_get_interrupts_enabled());
1312 cpu_mode_init(current_cpu_datap());
1318 LAPIC_CPU_MAP_DUMP();
1324 init_param
= FAST_SLAVE_INIT
;
1326 /* resume VT operation */
1332 cpu_thread_init(); /* not strictly necessary */
1334 cpu_init(); /* Sets cpu_running which starter cpu waits for */
1336 slave_main(init_param
);
1338 panic("do_init_slave() returned from slave_main()");
1342 * i386_init_slave() is called from pstart.
1343 * We're in the cpu's interrupt stack with interrupts disabled.
1344 * At this point we are in legacy mode. We need to switch on IA32e
1345 * if the mode is set to 64-bits.
1348 i386_init_slave(void)
1350 do_init_slave(FALSE
);
1354 * i386_init_slave_fast() is called from pmCPUHalt.
1355 * We're running on the idle thread and need to fix up
1356 * some accounting and get it so that the scheduler sees this
1360 i386_init_slave_fast(void)
1362 do_init_slave(TRUE
);
1366 slave_machine_init(void *param
)
1369 * Here in process context, but with interrupts disabled.
1371 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1373 if (param
== FULL_SLAVE_INIT
) {
1379 cpu_machine_init(); /* Interrupts enabled hereafter */
1384 int cpu_number(void)
1386 return get_cpu_number();
1390 #include <ddb/db_output.h>
1392 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1397 struct mp_trap_hist_struct
{
1399 unsigned char data
[5];
1400 } trap_hist
[MTRAPS
], *cur_trap_hist
= trap_hist
,
1401 *max_trap_hist
= &trap_hist
[MTRAPS
];
1403 void db_trap_hist(void);
1423 for(i
=0;i
<MTRAPS
;i
++)
1424 if (trap_hist
[i
].type
== 1 || trap_hist
[i
].type
== 2) {
1426 (&trap_hist
[i
]>=cur_trap_hist
)?"*":" ",
1427 (trap_hist
[i
].type
== 1)?"SPL":"INT");
1429 db_printf(" %02x", trap_hist
[i
].data
[j
]);
1434 #endif /* TRAP_DEBUG */
1435 #endif /* MACH_KDB */