2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
35 #include <mach_ldebug.h>
38 #include <mach/mach_types.h>
39 #include <mach/kern_return.h>
41 #include <kern/kern_types.h>
42 #include <kern/startup.h>
43 #include <kern/timer_queue.h>
44 #include <kern/processor.h>
45 #include <kern/cpu_number.h>
46 #include <kern/cpu_data.h>
47 #include <kern/assert.h>
48 #include <kern/machine.h>
50 #include <kern/misc_protos.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
55 #include <profiling/profile-mk.h>
58 #include <i386/mp_events.h>
59 #include <i386/mp_slave_boot.h>
60 #include <i386/lapic.h>
63 #include <i386/cpuid.h>
64 #include <i386/proc_reg.h>
65 #include <i386/machine_cpu.h>
66 #include <i386/misc_protos.h>
67 #include <i386/mtrr.h>
68 #include <i386/vmx/vmx_cpu.h>
69 #include <i386/postcode.h>
70 #include <i386/perfmon.h>
71 #include <i386/cpu_threads.h>
72 #include <i386/mp_desc.h>
73 #include <i386/trap.h>
74 #include <i386/machine_routines.h>
75 #include <i386/pmCPU.h>
76 #include <i386/machine_check.h>
78 #include <chud/chud_xnu.h>
79 #include <chud/chud_xnu_private.h>
81 #include <sys/kdebug.h>
83 #include <i386/db_machdep.h>
84 #include <ddb/db_aout.h>
85 #include <ddb/db_access.h>
86 #include <ddb/db_sym.h>
87 #include <ddb/db_variables.h>
88 #include <ddb/db_command.h>
89 #include <ddb/db_output.h>
90 #include <ddb/db_expr.h>
94 #define PAUSE delay(1000000)
95 #define DBG(x...) kprintf(x)
101 #define FULL_SLAVE_INIT (NULL)
102 #define FAST_SLAVE_INIT ((void *)(uintptr_t)1)
104 void slave_boot_init(void);
107 static void mp_kdb_wait(void);
108 volatile boolean_t mp_kdb_trap
= FALSE
;
109 volatile long mp_kdb_ncpus
= 0;
112 static void mp_kdp_wait(boolean_t flush
);
113 static void mp_rendezvous_action(void);
114 static void mp_broadcast_action(void);
116 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
117 static int cpu_signal_handler(x86_saved_state_t
*regs
);
118 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
120 boolean_t smp_initialized
= FALSE
;
121 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
122 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
124 decl_simple_lock_data(,mp_kdp_lock
);
126 decl_mutex_data(static, mp_cpu_boot_lock
);
128 /* Variables needed for MP rendezvous. */
129 decl_simple_lock_data(,mp_rv_lock
);
130 static void (*mp_rv_setup_func
)(void *arg
);
131 static void (*mp_rv_action_func
)(void *arg
);
132 static void (*mp_rv_teardown_func
)(void *arg
);
133 static void *mp_rv_func_arg
;
134 static int mp_rv_ncpus
;
135 /* Cache-aligned barriers: */
136 static volatile long mp_rv_entry
__attribute__((aligned(64)));
137 static volatile long mp_rv_exit
__attribute__((aligned(64)));
138 static volatile long mp_rv_complete
__attribute__((aligned(64)));
140 /* Variables needed for MP broadcast. */
141 static void (*mp_bc_action_func
)(void *arg
);
142 static void *mp_bc_func_arg
;
143 static int mp_bc_ncpus
;
144 static volatile long mp_bc_count
;
145 decl_mutex_data(static, mp_bc_lock
);
146 static volatile int debugger_cpu
= -1;
148 static void mp_cpus_call_action(void);
149 static void mp_call_PM(void);
153 * Initialize dummy structs for profiling. These aren't used but
154 * allows hertz_tick() to be built with GPROF defined.
156 struct profile_vars _profile_vars
;
157 struct profile_vars
*_profile_vars_cpus
[MAX_CPUS
] = { &_profile_vars
};
158 #define GPROF_INIT() \
162 /* Hack to initialize pointers to unused profiling structs */ \
163 for (i = 1; i < MAX_CPUS; i++) \
164 _profile_vars_cpus[i] = &_profile_vars; \
173 simple_lock_init(&mp_kdp_lock
, 0);
174 simple_lock_init(&mp_rv_lock
, 0);
175 mutex_init(&mp_cpu_boot_lock
, 0);
176 mutex_init(&mp_bc_lock
, 0);
185 lapic_set_intr_func(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
);
186 lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
);
191 DBGLOG_CPU_INIT(master_cpu
);
195 smp_initialized
= TRUE
;
201 * Poll a CPU to see when it has marked itself as running.
204 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
206 while (iters
-- > 0) {
207 if (cpu_datap(slot_num
)->cpu_running
)
217 boolean_t is_nehalem
;
218 } processor_start_info_t
;
220 static processor_start_info_t start_info
;
226 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
228 /* Ignore this if the current processor is not the starter */
229 if (cpu_number() != psip
->starter_cpu
)
232 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
233 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_INIT
);
234 delay(psip
->is_nehalem
? 100 : 10000);
236 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
237 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(MP_BOOT
>>12));
239 if (!psip
->is_nehalem
) {
241 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
242 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(MP_BOOT
>>12));
245 #ifdef POSTCODE_DELAY
246 /* Wait much longer if postcodes are displayed for a delay period. */
249 mp_wait_for_cpu_up(psip
->target_cpu
, i
*100, 100);
256 int lapic
= cpu_to_lapic
[slot_num
];
261 DBGLOG_CPU_INIT(slot_num
);
263 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
264 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) IdlePTD
);
267 * Initialize (or re-initialize) the descriptor tables for this cpu.
268 * Propagate processor mode to slave.
270 if (cpu_mode_is64bit())
271 cpu_desc_init64(cpu_datap(slot_num
), FALSE
);
273 cpu_desc_init(cpu_datap(slot_num
), FALSE
);
275 /* Serialize use of the slave boot stack, etc. */
276 mutex_lock(&mp_cpu_boot_lock
);
278 istate
= ml_set_interrupts_enabled(FALSE
);
279 if (slot_num
== get_cpu_number()) {
280 ml_set_interrupts_enabled(istate
);
281 mutex_unlock(&mp_cpu_boot_lock
);
285 start_info
.starter_cpu
= cpu_number();
286 start_info
.is_nehalem
= (cpuid_info()->cpuid_model
287 == CPUID_MODEL_NEHALEM
);
288 start_info
.target_cpu
= slot_num
;
289 start_info
.target_lapic
= lapic
;
292 * For Nehalem, perform the processor startup with all running
293 * processors rendezvous'ed. This is required during periods when
294 * the cache-disable bit is set for MTRR/PAT initialization.
296 if (start_info
.is_nehalem
)
297 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
299 start_cpu((void *) &start_info
);
301 ml_set_interrupts_enabled(istate
);
302 mutex_unlock(&mp_cpu_boot_lock
);
304 if (!cpu_datap(slot_num
)->cpu_running
) {
305 kprintf("Failed to start CPU %02d\n", slot_num
);
306 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
311 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
317 * Quickly bring a CPU back online which has been halted.
320 intel_startCPU_fast(int slot_num
)
325 * Try to perform a fast restart
327 rc
= pmCPUExitHalt(slot_num
);
328 if (rc
!= KERN_SUCCESS
)
330 * The CPU was not eligible for a fast restart.
335 * Wait until the CPU is back online.
337 mp_disable_preemption();
340 * We use short pauses (1us) for low latency. 30,000 iterations is
341 * longer than a full restart would require so it should be more
344 mp_wait_for_cpu_up(slot_num
, 30000, 1);
345 mp_enable_preemption();
348 * Check to make sure that the CPU is really running. If not,
349 * go through the slow path.
351 if (cpu_datap(slot_num
)->cpu_running
)
352 return(KERN_SUCCESS
);
354 return(KERN_FAILURE
);
357 extern char slave_boot_base
[];
358 extern char slave_boot_end
[];
359 extern void slave_pstart(void);
362 slave_boot_init(void)
364 DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n",
366 kvtophys((vm_offset_t
) slave_boot_base
),
368 slave_boot_end
-slave_boot_base
);
371 * Copy the boot entry code to the real-mode vector area MP_BOOT.
372 * This is in page 1 which has been reserved for this purpose by
373 * machine_startup() from the boot processor.
374 * The slave boot code is responsible for switching to protected
375 * mode and then jumping to the common startup, _start().
377 bcopy_phys(kvtophys((vm_offset_t
) slave_boot_base
),
379 slave_boot_end
-slave_boot_base
);
382 * Zero a stack area above the boot code.
384 DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK
+MP_BOOT
-0x400, 0x400);
385 bzero_phys((addr64_t
)MP_BOOTSTACK
+MP_BOOT
-0x400, 0x400);
388 * Set the location at the base of the stack to point to the
389 * common startup entry.
391 DBG("writing 0x%x at phys 0x%x\n",
392 kvtophys((vm_offset_t
) &slave_pstart
), MP_MACH_START
+MP_BOOT
);
393 ml_phys_write_word(MP_MACH_START
+MP_BOOT
,
394 (unsigned int)kvtophys((vm_offset_t
) &slave_pstart
));
401 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
402 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
404 MP_EVENT_NAME_DECL();
406 #endif /* MP_DEBUG */
409 cpu_signal_handler(x86_saved_state_t
*regs
)
412 volatile int *my_word
;
413 #if MACH_KDB && MACH_ASSERT
415 #endif /* MACH_KDB && MACH_ASSERT */
417 mp_disable_preemption();
419 my_cpu
= cpu_number();
420 my_word
= ¤t_cpu_datap()->cpu_signals
;
423 #if MACH_KDB && MACH_ASSERT
425 Debugger("cpu_signal_handler: signals did not clear");
426 #endif /* MACH_KDB && MACH_ASSERT */
428 if (i_bit(MP_KDP
, my_word
)) {
429 DBGLOG(cpu_handle
,my_cpu
,MP_KDP
);
430 i_bit_clear(MP_KDP
, my_word
);
431 /* Ensure that the i386_kernel_state at the base of the
432 * current thread's stack (if any) is synchronized with the
433 * context at the moment of the interrupt, to facilitate
434 * access through the debugger.
437 sync_iss_to_iks(saved_state32(regs
));
440 #endif /* MACH_KDP */
441 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
442 DBGLOG(cpu_handle
,my_cpu
,MP_TLB_FLUSH
);
443 i_bit_clear(MP_TLB_FLUSH
, my_word
);
444 pmap_update_interrupt();
445 } else if (i_bit(MP_AST
, my_word
)) {
446 DBGLOG(cpu_handle
,my_cpu
,MP_AST
);
447 i_bit_clear(MP_AST
, my_word
);
448 ast_check(cpu_to_processor(my_cpu
));
450 } else if (i_bit(MP_KDB
, my_word
)) {
452 i_bit_clear(MP_KDB
, my_word
);
453 current_cpu_datap()->cpu_kdb_is_slave
++;
455 current_cpu_datap()->cpu_kdb_is_slave
--;
456 #endif /* MACH_KDB */
457 } else if (i_bit(MP_RENDEZVOUS
, my_word
)) {
458 DBGLOG(cpu_handle
,my_cpu
,MP_RENDEZVOUS
);
459 i_bit_clear(MP_RENDEZVOUS
, my_word
);
460 mp_rendezvous_action();
461 } else if (i_bit(MP_BROADCAST
, my_word
)) {
462 DBGLOG(cpu_handle
,my_cpu
,MP_BROADCAST
);
463 i_bit_clear(MP_BROADCAST
, my_word
);
464 mp_broadcast_action();
465 } else if (i_bit(MP_CHUD
, my_word
)) {
466 DBGLOG(cpu_handle
,my_cpu
,MP_CHUD
);
467 i_bit_clear(MP_CHUD
, my_word
);
468 chudxnu_cpu_signal_handler();
469 } else if (i_bit(MP_CALL
, my_word
)) {
470 DBGLOG(cpu_handle
,my_cpu
,MP_CALL
);
471 i_bit_clear(MP_CALL
, my_word
);
472 mp_cpus_call_action();
473 } else if (i_bit(MP_CALL_PM
, my_word
)) {
474 DBGLOG(cpu_handle
,my_cpu
,MP_CALL_PM
);
475 i_bit_clear(MP_CALL_PM
, my_word
);
480 mp_enable_preemption();
486 NMIInterruptHandler(x86_saved_state_t
*regs
)
490 sync_iss_to_iks_unconditionally(regs
);
491 __asm__
volatile("movl %%ebp, %0" : "=m" (stackptr
));
493 if (cpu_number() == debugger_cpu
)
496 if (pmap_tlb_flush_timeout
== TRUE
&& current_cpu_datap()->cpu_tlb_invalid
) {
498 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
499 panic_i386_backtrace(stackptr
, 10, &pstr
[0], TRUE
, regs
);
500 panic_io_port_read();
503 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
514 int max_lock_loops
= 1000000;
515 int trappedalready
= 0; /* (BRINGUP */
516 #endif /* MP_DEBUG */
519 i386_cpu_IPI(int cpu
)
524 if(cpu_datap(cpu
)->cpu_signals
& 6) { /* (BRINGUP) */
525 kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu
)->cpu_signals
, cpu
);
527 #endif /* MP_DEBUG */
531 if(!trappedalready
&& (cpu_datap(cpu
)->cpu_signals
& 6)) { /* (BRINGUP) */
532 if(kdb_cpu
!= cpu_number()) {
534 panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n",
535 cpu_datap(cpu
)->cpu_signals
, cpu
, kdb_cpu
);
538 #endif /* MP_DEBUG */
541 /* Wait for previous interrupt to be delivered... */
543 int pending_busy_count
= 0;
544 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
545 if (++pending_busy_count
> max_lock_loops
)
546 panic("i386_cpu_IPI() deadlock\n");
548 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
549 #endif /* MP_DEBUG */
553 state
= ml_set_interrupts_enabled(FALSE
);
554 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
555 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
) | LAPIC_ICR_DM_FIXED
);
556 (void) ml_set_interrupts_enabled(state
);
560 * cpu_interrupt is really just to be used by the scheduler to
561 * get a CPU's attention it may not always issue an IPI. If an
562 * IPI is always needed then use i386_cpu_IPI.
565 cpu_interrupt(int cpu
)
568 && pmCPUExitIdle(cpu_datap(cpu
))) {
574 * Send a true NMI via the local APIC to the specified CPU.
577 cpu_NMI_interrupt(int cpu
)
581 if (smp_initialized
) {
582 state
= ml_set_interrupts_enabled(FALSE
);
583 /* Program the interrupt command register */
584 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
585 /* The vector is ignored in this case--the target CPU will enter on the
588 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
)|LAPIC_ICR_DM_NMI
);
589 (void) ml_set_interrupts_enabled(state
);
593 static volatile void (*mp_PM_func
)(void) = NULL
;
598 assert(!ml_get_interrupts_enabled());
600 if (mp_PM_func
!= NULL
)
605 cpu_PM_interrupt(int cpu
)
607 assert(!ml_get_interrupts_enabled());
609 if (mp_PM_func
!= NULL
) {
610 if (cpu
== cpu_number())
613 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
618 PM_interrupt_register(void (*fn
)(void))
624 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
626 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
627 uint64_t tsc_timeout
;
630 if (!cpu_datap(cpu
)->cpu_running
)
633 if (event
== MP_TLB_FLUSH
)
634 KERNEL_DEBUG(0xef800020 | DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
636 DBGLOG(cpu_signal
, cpu
, event
);
638 i_bit_set(event
, signals
);
642 tsc_timeout
= rdtsc64() + (1000*1000*1000);
643 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
646 if (i_bit(event
, signals
)) {
647 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
652 if (event
== MP_TLB_FLUSH
)
653 KERNEL_DEBUG(0xef800020 | DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
657 * Send event to all running cpus.
658 * Called with the topology locked.
661 i386_signal_cpus(mp_event_t event
, mp_sync_t mode
)
664 unsigned int my_cpu
= cpu_number();
666 assert(hw_lock_held(&x86_topo_lock
));
668 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
669 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
671 i386_signal_cpu(cpu
, event
, mode
);
676 * Return the number of running cpus.
677 * Called with the topology locked.
680 i386_active_cpus(void)
683 unsigned int ncpus
= 0;
685 assert(hw_lock_held(&x86_topo_lock
));
687 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
688 if (cpu_datap(cpu
)->cpu_running
)
695 * All-CPU rendezvous:
696 * - CPUs are signalled,
697 * - all execute the setup function (if specified),
698 * - rendezvous (i.e. all cpus reach a barrier),
699 * - all execute the action function (if specified),
700 * - rendezvous again,
701 * - execute the teardown function (if specified), and then
704 * Note that the supplied external functions _must_ be reentrant and aware
705 * that they are running in parallel and in an unknown lock context.
709 mp_rendezvous_action(void)
711 boolean_t intrs_enabled
;
714 if (mp_rv_setup_func
!= NULL
)
715 mp_rv_setup_func(mp_rv_func_arg
);
717 intrs_enabled
= ml_get_interrupts_enabled();
719 /* spin on entry rendezvous */
720 atomic_incl(&mp_rv_entry
, 1);
721 while (mp_rv_entry
< mp_rv_ncpus
) {
722 /* poll for pesky tlb flushes if interrupts disabled */
724 handle_pending_TLB_flushes();
727 /* action function */
728 if (mp_rv_action_func
!= NULL
)
729 mp_rv_action_func(mp_rv_func_arg
);
730 /* spin on exit rendezvous */
731 atomic_incl(&mp_rv_exit
, 1);
732 while (mp_rv_exit
< mp_rv_ncpus
) {
734 handle_pending_TLB_flushes();
738 /* teardown function */
739 if (mp_rv_teardown_func
!= NULL
)
740 mp_rv_teardown_func(mp_rv_func_arg
);
742 /* Bump completion count */
743 atomic_incl(&mp_rv_complete
, 1);
747 mp_rendezvous(void (*setup_func
)(void *),
748 void (*action_func
)(void *),
749 void (*teardown_func
)(void *),
753 if (!smp_initialized
) {
754 if (setup_func
!= NULL
)
756 if (action_func
!= NULL
)
758 if (teardown_func
!= NULL
)
763 /* obtain rendezvous lock */
764 simple_lock(&mp_rv_lock
);
766 /* set static function pointers */
767 mp_rv_setup_func
= setup_func
;
768 mp_rv_action_func
= action_func
;
769 mp_rv_teardown_func
= teardown_func
;
770 mp_rv_func_arg
= arg
;
777 * signal other processors, which will call mp_rendezvous_action()
778 * with interrupts disabled
780 simple_lock(&x86_topo_lock
);
781 mp_rv_ncpus
= i386_active_cpus();
782 i386_signal_cpus(MP_RENDEZVOUS
, ASYNC
);
783 simple_unlock(&x86_topo_lock
);
785 /* call executor function on this cpu */
786 mp_rendezvous_action();
789 * Spin for everyone to complete.
790 * This is necessary to ensure that all processors have proceeded
791 * from the exit barrier before we release the rendezvous structure.
793 while (mp_rv_complete
< mp_rv_ncpus
) {
798 mp_rv_setup_func
= NULL
;
799 mp_rv_action_func
= NULL
;
800 mp_rv_teardown_func
= NULL
;
801 mp_rv_func_arg
= NULL
;
804 simple_unlock(&mp_rv_lock
);
808 mp_rendezvous_break_lock(void)
810 simple_lock_init(&mp_rv_lock
, 0);
814 setup_disable_intrs(__unused
void * param_not_used
)
816 /* disable interrupts before the first barrier */
817 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
819 current_cpu_datap()->cpu_iflag
= intr
;
820 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
824 teardown_restore_intrs(__unused
void * param_not_used
)
826 /* restore interrupt flag following MTRR changes */
827 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
828 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
832 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
833 * This is exported for use by kexts.
836 mp_rendezvous_no_intrs(
837 void (*action_func
)(void *),
840 mp_rendezvous(setup_disable_intrs
,
842 teardown_restore_intrs
,
847 handle_pending_TLB_flushes(void)
849 volatile int *my_word
= ¤t_cpu_datap()->cpu_signals
;
851 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
852 DBGLOG(cpu_handle
, cpu_number(), MP_TLB_FLUSH
);
853 i_bit_clear(MP_TLB_FLUSH
, my_word
);
854 pmap_update_interrupt();
859 * This is called from cpu_signal_handler() to process an MP_CALL signal.
862 mp_cpus_call_action(void)
864 if (mp_rv_action_func
!= NULL
)
865 mp_rv_action_func(mp_rv_func_arg
);
866 atomic_incl(&mp_rv_complete
, 1);
870 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
871 * If the mode is SYNC, the function is called serially on the target cpus
872 * in logical cpu order. If the mode is ASYNC, the function is called in
873 * parallel over the specified cpus.
874 * The action function may be NULL.
875 * The cpu mask may include the local cpu. Offline cpus are ignored.
876 * Return does not occur until the function has completed on all cpus.
877 * The return value is the number of cpus on which the function was called.
883 void (*action_func
)(void *),
887 boolean_t intrs_enabled
= ml_get_interrupts_enabled();
888 boolean_t call_self
= FALSE
;
890 if (!smp_initialized
) {
891 if ((cpus
& CPUMASK_SELF
) == 0)
893 if (action_func
!= NULL
) {
894 (void) ml_set_interrupts_enabled(FALSE
);
896 ml_set_interrupts_enabled(intrs_enabled
);
901 /* obtain rendezvous lock */
902 simple_lock(&mp_rv_lock
);
904 /* Use the rendezvous data structures for this call */
905 mp_rv_action_func
= action_func
;
906 mp_rv_func_arg
= arg
;
910 simple_lock(&x86_topo_lock
);
911 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
912 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
913 !cpu_datap(cpu
)->cpu_running
)
915 if (cpu
== (cpu_t
) cpu_number()) {
917 * We don't IPI ourself and if calling asynchronously,
918 * we defer our call until we have signalled all others.
921 if (mode
== SYNC
&& action_func
!= NULL
) {
922 (void) ml_set_interrupts_enabled(FALSE
);
924 ml_set_interrupts_enabled(intrs_enabled
);
928 * Bump count of other cpus called and signal this cpu.
929 * Note: we signal asynchronously regardless of mode
930 * because we wait on mp_rv_complete either here
931 * (if mode == SYNC) or later (if mode == ASYNC).
932 * While spinning, poll for TLB flushes if interrupts
936 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
938 simple_unlock(&x86_topo_lock
);
939 while (mp_rv_complete
< mp_rv_ncpus
) {
941 handle_pending_TLB_flushes();
944 simple_lock(&x86_topo_lock
);
948 simple_unlock(&x86_topo_lock
);
951 * If calls are being made asynchronously,
952 * make the local call now if needed, and then
953 * wait for all other cpus to finish their calls.
956 if (call_self
&& action_func
!= NULL
) {
957 (void) ml_set_interrupts_enabled(FALSE
);
959 ml_set_interrupts_enabled(intrs_enabled
);
961 while (mp_rv_complete
< mp_rv_ncpus
) {
963 handle_pending_TLB_flushes();
968 /* Determine the number of cpus called */
969 cpu
= mp_rv_ncpus
+ (call_self
? 1 : 0);
971 simple_unlock(&mp_rv_lock
);
977 mp_broadcast_action(void)
979 /* call action function */
980 if (mp_bc_action_func
!= NULL
)
981 mp_bc_action_func(mp_bc_func_arg
);
983 /* if we're the last one through, wake up the instigator */
984 if (atomic_decl_and_test((volatile long *)&mp_bc_count
, 1))
985 thread_wakeup(((event_t
)(unsigned int *) &mp_bc_count
));
989 * mp_broadcast() runs a given function on all active cpus.
990 * The caller blocks until the functions has run on all cpus.
991 * The caller will also block if there is another pending braodcast.
995 void (*action_func
)(void *),
998 if (!smp_initialized
) {
999 if (action_func
!= NULL
)
1004 /* obtain broadcast lock */
1005 mutex_lock(&mp_bc_lock
);
1007 /* set static function pointers */
1008 mp_bc_action_func
= action_func
;
1009 mp_bc_func_arg
= arg
;
1011 assert_wait(&mp_bc_count
, THREAD_UNINT
);
1014 * signal other processors, which will call mp_broadcast_action()
1016 simple_lock(&x86_topo_lock
);
1017 mp_bc_ncpus
= i386_active_cpus(); /* total including this cpu */
1018 mp_bc_count
= mp_bc_ncpus
;
1019 i386_signal_cpus(MP_BROADCAST
, ASYNC
);
1021 /* call executor function on this cpu */
1022 mp_broadcast_action();
1023 simple_unlock(&x86_topo_lock
);
1025 /* block for all cpus to have run action_func */
1026 if (mp_bc_ncpus
> 1)
1027 thread_block(THREAD_CONTINUE_NULL
);
1029 clear_wait(current_thread(), THREAD_AWAKENED
);
1032 mutex_unlock(&mp_bc_lock
);
1036 i386_activate_cpu(void)
1038 cpu_data_t
*cdp
= current_cpu_datap();
1040 assert(!ml_get_interrupts_enabled());
1042 if (!smp_initialized
) {
1043 cdp
->cpu_running
= TRUE
;
1047 simple_lock(&x86_topo_lock
);
1048 cdp
->cpu_running
= TRUE
;
1049 simple_unlock(&x86_topo_lock
);
1052 extern void etimer_timer_expire(void *arg
);
1055 i386_deactivate_cpu(void)
1057 cpu_data_t
*cdp
= current_cpu_datap();
1059 assert(!ml_get_interrupts_enabled());
1061 simple_lock(&x86_topo_lock
);
1062 cdp
->cpu_running
= FALSE
;
1063 simple_unlock(&x86_topo_lock
);
1065 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1066 cdp
->rtclock_timer
.deadline
= EndOfAllTime
;
1067 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, etimer_timer_expire
, NULL
);
1070 * In case a rendezvous/braodcast/call was initiated to this cpu
1071 * before we cleared cpu_running, we must perform any actions due.
1073 if (i_bit(MP_RENDEZVOUS
, &cdp
->cpu_signals
))
1074 mp_rendezvous_action();
1075 if (i_bit(MP_BROADCAST
, &cdp
->cpu_signals
))
1076 mp_broadcast_action();
1077 if (i_bit(MP_CALL
, &cdp
->cpu_signals
))
1078 mp_cpus_call_action();
1079 cdp
->cpu_signals
= 0; /* all clear */
1082 int pmsafe_debug
= 1;
1085 volatile boolean_t mp_kdp_trap
= FALSE
;
1086 volatile unsigned long mp_kdp_ncpus
;
1087 boolean_t mp_kdp_state
;
1095 unsigned int my_cpu
;
1096 uint64_t tsc_timeout
;
1098 DBG("mp_kdp_enter()\n");
1101 * Here to enter the debugger.
1102 * In case of races, only one cpu is allowed to enter kdp after
1105 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1106 simple_lock(&mp_kdp_lock
);
1109 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1111 while (mp_kdp_trap
) {
1112 simple_unlock(&mp_kdp_lock
);
1113 DBG("mp_kdp_enter() race lost\n");
1115 simple_lock(&mp_kdp_lock
);
1117 my_cpu
= cpu_number();
1118 debugger_cpu
= my_cpu
;
1119 mp_kdp_ncpus
= 1; /* self */
1121 simple_unlock(&mp_kdp_lock
);
1124 * Deliver a nudge to other cpus, counting how many
1126 DBG("mp_kdp_enter() signaling other processors\n");
1127 if (force_immediate_debugger_NMI
== FALSE
) {
1128 for (ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1129 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1132 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1135 * Wait other processors to synchronize
1137 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1140 * This timeout is rather arbitrary; we don't want to NMI
1141 * processors that are executing at potentially
1142 * "unsafe-to-interrupt" points such as the trampolines,
1143 * but neither do we want to lose state by waiting too long.
1145 tsc_timeout
= rdtsc64() + (ncpus
* 1000 * 1000);
1147 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1149 * A TLB shootdown request may be pending--this would
1150 * result in the requesting processor waiting in
1151 * PMAP_UPDATE_TLBS() until this processor deals with it.
1152 * Process it, so it can now enter mp_kdp_wait()
1154 handle_pending_TLB_flushes();
1157 /* If we've timed out, and some processor(s) are still unresponsive,
1158 * interrupt them with an NMI via the local APIC.
1160 if (mp_kdp_ncpus
!= ncpus
) {
1161 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1162 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1164 if (cpu_signal_pending(cpu
, MP_KDP
))
1165 cpu_NMI_interrupt(cpu
);
1170 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1171 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1173 cpu_NMI_interrupt(cpu
);
1176 DBG("mp_kdp_enter() %u processors done %s\n",
1177 mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1179 postcode(MP_KDP_ENTER
);
1183 cpu_signal_pending(int cpu
, mp_event_t event
)
1185 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1186 boolean_t retval
= FALSE
;
1188 if (i_bit(event
, signals
))
1195 mp_kdp_wait(boolean_t flush
)
1197 DBG("mp_kdp_wait()\n");
1198 /* If an I/O port has been specified as a debugging aid, issue a read */
1199 panic_io_port_read();
1201 /* If we've trapped due to a machine-check, save MCA registers */
1205 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1207 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1208 while (mp_kdp_trap
) {
1210 * A TLB shootdown request may be pending--this would result
1211 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1212 * until this processor handles it.
1213 * Process it, so it can now enter mp_kdp_wait()
1216 handle_pending_TLB_flushes();
1221 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1223 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1224 DBG("mp_kdp_wait() done\n");
1230 DBG("mp_kdp_exit()\n");
1232 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1233 mp_kdp_trap
= FALSE
;
1234 __asm__
volatile("mfence");
1236 /* Wait other processors to stop spinning. XXX needs timeout */
1237 DBG("mp_kdp_exit() waiting for processors to resume\n");
1238 while (mp_kdp_ncpus
> 0) {
1240 * a TLB shootdown request may be pending... this would result in the requesting
1241 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1242 * Process it, so it can now enter mp_kdp_wait()
1244 handle_pending_TLB_flushes();
1250 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1252 DBG("mp_kdp_exit() done\n");
1253 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1256 #endif /* MACH_KDP */
1261 __unused processor_t processor
)
1267 processor_t processor
)
1269 int cpu
= processor
->cpu_num
;
1271 if (cpu
!= cpu_number()) {
1272 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1278 * invoke kdb on slave processors
1284 unsigned int my_cpu
= cpu_number();
1287 uint64_t tsc_timeout
= 0;
1291 for (kdb_ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1292 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1295 i386_signal_cpu(cpu
, MP_KDB
, ASYNC
);
1297 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus
);
1299 tsc_timeout
= rdtsc64() + (kdb_ncpus
* 100 * 1000 * 1000);
1301 while (mp_kdb_ncpus
!= kdb_ncpus
&& rdtsc64() < tsc_timeout
) {
1303 * a TLB shootdown request may be pending... this would result in the requesting
1304 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1305 * Process it, so it can now enter mp_kdp_wait()
1307 handle_pending_TLB_flushes();
1311 DBG("mp_kdp_enter() %d processors done %s\n",
1312 mp_kdb_ncpus
, (mp_kdb_ncpus
== kdb_ncpus
) ? "OK" : "timed out");
1318 DBG("mp_kdb_wait()\n");
1320 /* If an I/O port has been specified as a debugging aid, issue a read */
1321 panic_io_port_read();
1323 atomic_incl(&mp_kdb_ncpus
, 1);
1324 while (mp_kdb_trap
) {
1326 * a TLB shootdown request may be pending... this would result in the requesting
1327 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1328 * Process it, so it can now enter mp_kdp_wait()
1330 handle_pending_TLB_flushes();
1334 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1335 DBG("mp_kdb_wait() done\n");
1339 * Clear kdb interrupt
1343 clear_kdb_intr(void)
1345 mp_disable_preemption();
1346 i_bit_clear(MP_KDB
, ¤t_cpu_datap()->cpu_signals
);
1347 mp_enable_preemption();
1353 DBG("mp_kdb_exit()\n");
1354 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1355 mp_kdb_trap
= FALSE
;
1356 __asm__
volatile("mfence");
1358 while (mp_kdb_ncpus
> 0) {
1360 * a TLB shootdown request may be pending... this would result in the requesting
1361 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1362 * Process it, so it can now enter mp_kdp_wait()
1364 handle_pending_TLB_flushes();
1369 DBG("mp_kdb_exit() done\n");
1372 #endif /* MACH_KDB */
1375 do_init_slave(boolean_t fast_restart
)
1377 void *init_param
= FULL_SLAVE_INIT
;
1379 postcode(I386_INIT_SLAVE
);
1381 if (!fast_restart
) {
1382 /* Ensure that caching and write-through are enabled */
1383 set_cr0(get_cr0() & ~(CR0_NW
|CR0_CD
));
1385 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
1386 get_cpu_number(), get_cpu_phys_number());
1388 assert(!ml_get_interrupts_enabled());
1390 cpu_mode_init(current_cpu_datap());
1396 LAPIC_CPU_MAP_DUMP();
1402 init_param
= FAST_SLAVE_INIT
;
1404 /* resume VT operation */
1410 cpu_thread_init(); /* not strictly necessary */
1412 cpu_init(); /* Sets cpu_running which starter cpu waits for */
1414 slave_main(init_param
);
1416 panic("do_init_slave() returned from slave_main()");
1420 * i386_init_slave() is called from pstart.
1421 * We're in the cpu's interrupt stack with interrupts disabled.
1422 * At this point we are in legacy mode. We need to switch on IA32e
1423 * if the mode is set to 64-bits.
1426 i386_init_slave(void)
1428 do_init_slave(FALSE
);
1432 * i386_init_slave_fast() is called from pmCPUHalt.
1433 * We're running on the idle thread and need to fix up
1434 * some accounting and get it so that the scheduler sees this
1438 i386_init_slave_fast(void)
1440 do_init_slave(TRUE
);
1444 slave_machine_init(void *param
)
1447 * Here in process context, but with interrupts disabled.
1449 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1451 if (param
== FULL_SLAVE_INIT
) {
1457 cpu_machine_init(); /* Interrupts enabled hereafter */
1462 int cpu_number(void)
1464 return get_cpu_number();
1468 #include <ddb/db_output.h>
1470 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1475 struct mp_trap_hist_struct
{
1477 unsigned char data
[5];
1478 } trap_hist
[MTRAPS
], *cur_trap_hist
= trap_hist
,
1479 *max_trap_hist
= &trap_hist
[MTRAPS
];
1481 void db_trap_hist(void);
1501 for(i
=0;i
<MTRAPS
;i
++)
1502 if (trap_hist
[i
].type
== 1 || trap_hist
[i
].type
== 2) {
1504 (&trap_hist
[i
]>=cur_trap_hist
)?"*":" ",
1505 (trap_hist
[i
].type
== 1)?"SPL":"INT");
1507 db_printf(" %02x", trap_hist
[i
].data
[j
]);
1512 #endif /* TRAP_DEBUG */
1513 #endif /* MACH_KDB */