2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
34 #include <mach_ldebug.h>
37 #include <mach/mach_types.h>
38 #include <mach/kern_return.h>
40 #include <kern/kern_types.h>
41 #include <kern/startup.h>
42 #include <kern/timer_queue.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53 #include <prng/random.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_kern.h>
58 #include <profiling/profile-mk.h>
60 #include <i386/bit_routines.h>
61 #include <i386/proc_reg.h>
62 #include <i386/cpu_threads.h>
63 #include <i386/mp_desc.h>
64 #include <i386/misc_protos.h>
65 #include <i386/trap.h>
66 #include <i386/postcode.h>
67 #include <i386/machine_routines.h>
69 #include <i386/mp_events.h>
70 #include <i386/lapic.h>
71 #include <i386/cpuid.h>
73 #include <i386/machine_cpu.h>
74 #include <i386/pmCPU.h>
76 #include <i386/machine_check.h>
78 #include <i386/acpi.h>
80 #include <chud/chud_xnu.h>
81 #include <chud/chud_xnu_private.h>
83 #include <sys/kdebug.h>
85 #include <console/serial_protos.h>
88 #define PAUSE delay(1000000)
89 #define DBG(x...) kprintf(x)
95 /* Debugging/test trace events: */
96 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
97 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
98 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
99 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
100 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
101 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
102 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
103 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
105 #define ABS(v) (((v) > 0)?(v):-(v))
107 void slave_boot_init(void);
108 void i386_cpu_IPI(int cpu
);
111 static void mp_kdp_wait(boolean_t flush
, boolean_t isNMI
);
112 #endif /* MACH_KDP */
113 static void mp_rendezvous_action(void);
114 static void mp_broadcast_action(void);
117 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
118 #endif /* MACH_KDP */
119 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
121 boolean_t smp_initialized
= FALSE
;
122 uint32_t TSC_sync_margin
= 0xFFF;
123 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
124 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
125 decl_simple_lock_data(,mp_kdp_lock
);
127 decl_lck_mtx_data(static, mp_cpu_boot_lock
);
128 lck_mtx_ext_t mp_cpu_boot_lock_ext
;
130 /* Variables needed for MP rendezvous. */
131 decl_simple_lock_data(,mp_rv_lock
);
132 static void (*mp_rv_setup_func
)(void *arg
);
133 static void (*mp_rv_action_func
)(void *arg
);
134 static void (*mp_rv_teardown_func
)(void *arg
);
135 static void *mp_rv_func_arg
;
136 static volatile int mp_rv_ncpus
;
137 /* Cache-aligned barriers: */
138 static volatile long mp_rv_entry
__attribute__((aligned(64)));
139 static volatile long mp_rv_exit
__attribute__((aligned(64)));
140 static volatile long mp_rv_complete
__attribute__((aligned(64)));
142 volatile uint64_t debugger_entry_time
;
143 volatile uint64_t debugger_exit_time
;
146 extern int kdp_snapshot
;
147 static struct _kdp_xcpu_call_func
{
148 kdp_x86_xcpu_func_t func
;
151 volatile uint16_t cpu
;
152 } kdp_xcpu_call_func
= {
158 /* Variables needed for MP broadcast. */
159 static void (*mp_bc_action_func
)(void *arg
);
160 static void *mp_bc_func_arg
;
161 static int mp_bc_ncpus
;
162 static volatile long mp_bc_count
;
163 decl_lck_mtx_data(static, mp_bc_lock
);
164 lck_mtx_ext_t mp_bc_lock_ext
;
165 static volatile int debugger_cpu
= -1;
166 volatile long NMIPI_acks
= 0;
167 volatile long NMI_count
= 0;
169 extern void NMI_cpus(void);
171 static void mp_cpus_call_init(void);
172 static void mp_cpus_call_action(void);
173 static void mp_call_PM(void);
175 static boolean_t mp_cpus_call_wait_timeout
= FALSE
;
177 char mp_slave_stack
[PAGE_SIZE
] __attribute__((aligned(PAGE_SIZE
))); // Temp stack for slave init
179 /* PAL-related routines */
180 boolean_t
i386_smp_init(int nmi_vector
, i386_intr_func_t nmi_handler
,
181 int ipi_vector
, i386_intr_func_t ipi_handler
);
182 void i386_start_cpu(int lapic_id
, int cpu_num
);
183 void i386_send_NMI(int cpu
);
187 * Initialize dummy structs for profiling. These aren't used but
188 * allows hertz_tick() to be built with GPROF defined.
190 struct profile_vars _profile_vars
;
191 struct profile_vars
*_profile_vars_cpus
[MAX_CPUS
] = { &_profile_vars
};
192 #define GPROF_INIT() \
196 /* Hack to initialize pointers to unused profiling structs */ \
197 for (i = 1; i < MAX_CPUS; i++) \
198 _profile_vars_cpus[i] = &_profile_vars; \
204 static lck_grp_t smp_lck_grp
;
205 static lck_grp_attr_t smp_lck_grp_attr
;
207 #define NUM_CPU_WARM_CALLS 20
208 struct timer_call cpu_warm_call_arr
[NUM_CPU_WARM_CALLS
];
209 queue_head_t cpu_warm_call_list
;
210 decl_simple_lock_data(static, cpu_warm_lock
);
212 typedef struct cpu_warm_data
{
213 timer_call_t cwd_call
;
214 uint64_t cwd_deadline
;
218 static void cpu_prewarm_init(void);
219 static void cpu_warm_timer_call_func(call_entry_param_t p0
, call_entry_param_t p1
);
220 static void _cpu_warm_setup(void *arg
);
221 static timer_call_t
grab_warm_timer_call(void);
222 static void free_warm_timer_call(timer_call_t call
);
227 simple_lock_init(&mp_kdp_lock
, 0);
228 simple_lock_init(&mp_rv_lock
, 0);
229 lck_grp_attr_setdefault(&smp_lck_grp_attr
);
230 lck_grp_init(&smp_lck_grp
, "i386_smp", &smp_lck_grp_attr
);
231 lck_mtx_init_ext(&mp_cpu_boot_lock
, &mp_cpu_boot_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
232 lck_mtx_init_ext(&mp_bc_lock
, &mp_bc_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
235 if(!i386_smp_init(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
,
236 LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
))
242 DBGLOG_CPU_INIT(master_cpu
);
245 mp_cpus_call_cpu_init(master_cpu
);
247 if (PE_parse_boot_argn("TSC_sync_margin",
248 &TSC_sync_margin
, sizeof(TSC_sync_margin
))) {
249 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin
);
250 } else if (cpuid_vmm_present()) {
251 kprintf("TSC sync margin disabled\n");
254 smp_initialized
= TRUE
;
265 } processor_start_info_t
;
266 static processor_start_info_t start_info
__attribute__((aligned(64)));
269 * Cache-alignment is to avoid cross-cpu false-sharing interference.
271 static volatile long tsc_entry_barrier
__attribute__((aligned(64)));
272 static volatile long tsc_exit_barrier
__attribute__((aligned(64)));
273 static volatile uint64_t tsc_target
__attribute__((aligned(64)));
276 * Poll a CPU to see when it has marked itself as running.
279 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
281 while (iters
-- > 0) {
282 if (cpu_datap(slot_num
)->cpu_running
)
289 * Quickly bring a CPU back online which has been halted.
292 intel_startCPU_fast(int slot_num
)
297 * Try to perform a fast restart
299 rc
= pmCPUExitHalt(slot_num
);
300 if (rc
!= KERN_SUCCESS
)
302 * The CPU was not eligible for a fast restart.
306 KERNEL_DEBUG_CONSTANT(
307 TRACE_MP_CPU_FAST_START
| DBG_FUNC_START
,
308 slot_num
, 0, 0, 0, 0);
311 * Wait until the CPU is back online.
313 mp_disable_preemption();
316 * We use short pauses (1us) for low latency. 30,000 iterations is
317 * longer than a full restart would require so it should be more
321 mp_wait_for_cpu_up(slot_num
, 30000, 1);
322 mp_enable_preemption();
324 KERNEL_DEBUG_CONSTANT(
325 TRACE_MP_CPU_FAST_START
| DBG_FUNC_END
,
326 slot_num
, cpu_datap(slot_num
)->cpu_running
, 0, 0, 0);
329 * Check to make sure that the CPU is really running. If not,
330 * go through the slow path.
332 if (cpu_datap(slot_num
)->cpu_running
)
333 return(KERN_SUCCESS
);
335 return(KERN_FAILURE
);
341 /* Here on the started cpu with cpu_running set TRUE */
343 if (TSC_sync_margin
&&
344 start_info
.target_cpu
== cpu_number()) {
346 * I've just started-up, synchronize again with the starter cpu
347 * and then snap my TSC.
350 atomic_decl(&tsc_entry_barrier
, 1);
351 while (tsc_entry_barrier
!= 0)
352 ; /* spin for starter and target at barrier */
353 tsc_target
= rdtsc64();
354 atomic_decl(&tsc_exit_barrier
, 1);
362 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
364 /* Ignore this if the current processor is not the starter */
365 if (cpu_number() != psip
->starter_cpu
)
368 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
369 arg
, psip
->target_cpu
, psip
->target_lapic
);
371 KERNEL_DEBUG_CONSTANT(
372 TRACE_MP_CPU_START
| DBG_FUNC_START
,
374 psip
->target_lapic
, 0, 0, 0);
376 i386_start_cpu(psip
->target_lapic
, psip
->target_cpu
);
378 #ifdef POSTCODE_DELAY
379 /* Wait much longer if postcodes are displayed for a delay period. */
382 DBG("start_cpu(%p) about to wait for cpu %d\n",
383 arg
, psip
->target_cpu
);
385 mp_wait_for_cpu_up(psip
->target_cpu
, i
*100, 100);
387 KERNEL_DEBUG_CONSTANT(
388 TRACE_MP_CPU_START
| DBG_FUNC_END
,
390 cpu_datap(psip
->target_cpu
)->cpu_running
, 0, 0, 0);
392 if (TSC_sync_margin
&&
393 cpu_datap(psip
->target_cpu
)->cpu_running
) {
395 * Compare the TSC from the started processor with ours.
396 * Report and log/panic if it diverges by more than
397 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
398 * can be overriden by boot-arg (with 0 meaning no checking).
400 uint64_t tsc_starter
;
402 atomic_decl(&tsc_entry_barrier
, 1);
403 while (tsc_entry_barrier
!= 0)
404 ; /* spin for both processors at barrier */
405 tsc_starter
= rdtsc64();
406 atomic_decl(&tsc_exit_barrier
, 1);
407 while (tsc_exit_barrier
!= 0)
408 ; /* spin for target to store its TSC */
409 tsc_delta
= tsc_target
- tsc_starter
;
410 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
411 psip
->target_cpu
, tsc_target
, tsc_delta
, tsc_delta
);
412 if (ABS(tsc_delta
) > (int64_t) TSC_sync_margin
) {
418 "Unsynchronized TSC for cpu %d: "
419 "0x%016llx, delta 0x%llx\n",
420 psip
->target_cpu
, tsc_target
, tsc_delta
);
429 int lapic
= cpu_to_lapic
[slot_num
];
434 DBGLOG_CPU_INIT(slot_num
);
436 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
437 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) (uintptr_t)IdlePTD
);
440 * Initialize (or re-initialize) the descriptor tables for this cpu.
441 * Propagate processor mode to slave.
443 cpu_desc_init64(cpu_datap(slot_num
));
445 /* Serialize use of the slave boot stack, etc. */
446 lck_mtx_lock(&mp_cpu_boot_lock
);
448 istate
= ml_set_interrupts_enabled(FALSE
);
449 if (slot_num
== get_cpu_number()) {
450 ml_set_interrupts_enabled(istate
);
451 lck_mtx_unlock(&mp_cpu_boot_lock
);
455 start_info
.starter_cpu
= cpu_number();
456 start_info
.target_cpu
= slot_num
;
457 start_info
.target_lapic
= lapic
;
458 tsc_entry_barrier
= 2;
459 tsc_exit_barrier
= 2;
462 * Perform the processor startup sequence with all running
463 * processors rendezvous'ed. This is required during periods when
464 * the cache-disable bit is set for MTRR/PAT initialization.
466 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
468 start_info
.target_cpu
= 0;
470 ml_set_interrupts_enabled(istate
);
471 lck_mtx_unlock(&mp_cpu_boot_lock
);
473 if (!cpu_datap(slot_num
)->cpu_running
) {
474 kprintf("Failed to start CPU %02d\n", slot_num
);
475 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
480 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
486 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
487 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
489 MP_EVENT_NAME_DECL();
491 #endif /* MP_DEBUG */
494 * Note: called with NULL state when polling for TLB flush and cross-calls.
497 cpu_signal_handler(x86_saved_state_t
*regs
)
500 #pragma unused (regs)
501 #endif /* !MACH_KDP */
503 volatile int *my_word
;
505 SCHED_STATS_IPI(current_processor());
507 my_cpu
= cpu_number();
508 my_word
= &cpu_data_ptr
[my_cpu
]->cpu_signals
;
509 /* Store the initial set of signals for diagnostics. New
510 * signals could arrive while these are being processed
511 * so it's no more than a hint.
514 cpu_data_ptr
[my_cpu
]->cpu_prior_signals
= *my_word
;
518 if (i_bit(MP_KDP
, my_word
)) {
519 DBGLOG(cpu_handle
,my_cpu
,MP_KDP
);
520 i_bit_clear(MP_KDP
, my_word
);
521 /* Ensure that the i386_kernel_state at the base of the
522 * current thread's stack (if any) is synchronized with the
523 * context at the moment of the interrupt, to facilitate
524 * access through the debugger.
526 sync_iss_to_iks(regs
);
527 if (pmsafe_debug
&& !kdp_snapshot
)
528 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
529 mp_kdp_wait(TRUE
, FALSE
);
530 if (pmsafe_debug
&& !kdp_snapshot
)
531 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
533 #endif /* MACH_KDP */
534 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
535 DBGLOG(cpu_handle
,my_cpu
,MP_TLB_FLUSH
);
536 i_bit_clear(MP_TLB_FLUSH
, my_word
);
537 pmap_update_interrupt();
538 } else if (i_bit(MP_RENDEZVOUS
, my_word
)) {
539 DBGLOG(cpu_handle
,my_cpu
,MP_RENDEZVOUS
);
540 i_bit_clear(MP_RENDEZVOUS
, my_word
);
541 mp_rendezvous_action();
542 } else if (i_bit(MP_BROADCAST
, my_word
)) {
543 DBGLOG(cpu_handle
,my_cpu
,MP_BROADCAST
);
544 i_bit_clear(MP_BROADCAST
, my_word
);
545 mp_broadcast_action();
546 } else if (i_bit(MP_CHUD
, my_word
)) {
547 DBGLOG(cpu_handle
,my_cpu
,MP_CHUD
);
548 i_bit_clear(MP_CHUD
, my_word
);
549 chudxnu_cpu_signal_handler();
550 } else if (i_bit(MP_CALL
, my_word
)) {
551 DBGLOG(cpu_handle
,my_cpu
,MP_CALL
);
552 i_bit_clear(MP_CALL
, my_word
);
553 mp_cpus_call_action();
554 } else if (i_bit(MP_CALL_PM
, my_word
)) {
555 DBGLOG(cpu_handle
,my_cpu
,MP_CALL_PM
);
556 i_bit_clear(MP_CALL_PM
, my_word
);
560 /* Called to poll only for cross-calls and TLB flush */
562 } else if (i_bit(MP_AST
, my_word
)) {
563 DBGLOG(cpu_handle
,my_cpu
,MP_AST
);
564 i_bit_clear(MP_AST
, my_word
);
565 ast_check(cpu_to_processor(my_cpu
));
572 extern void kprintf_break_lock(void);
574 NMIInterruptHandler(x86_saved_state_t
*regs
)
578 if (panic_active() && !panicDebugging
) {
580 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
585 atomic_incl(&NMIPI_acks
, 1);
586 atomic_incl(&NMI_count
, 1);
587 sync_iss_to_iks_unconditionally(regs
);
588 __asm__
volatile("movq %%rbp, %0" : "=m" (stackptr
));
590 if (cpu_number() == debugger_cpu
)
593 if (spinlock_timed_out
) {
595 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out
, (void *) spinlock_timed_out
->interlock
.lock_data
, current_thread(), spinlock_owner_cpu
);
596 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
597 } else if (mp_cpus_call_wait_timeout
) {
599 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number());
600 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
601 } else if (pmap_tlb_flush_timeout
== TRUE
) {
603 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid
);
604 panic_i386_backtrace(stackptr
, 48, &pstr
[0], TRUE
, regs
);
608 if (pmsafe_debug
&& !kdp_snapshot
)
609 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
610 current_cpu_datap()->cpu_NMI_acknowledged
= TRUE
;
611 i_bit_clear(MP_KDP
, ¤t_cpu_datap()->cpu_signals
);
612 if (pmap_tlb_flush_timeout
||
613 spinlock_timed_out
||
614 mp_cpus_call_wait_timeout
||
616 mp_kdp_wait(FALSE
, TRUE
);
617 } else if (virtualized
&& (debug_boot_arg
& DB_NMI
)) {
619 * Under a VMM with the debug boot-arg set, drop into kdp.
620 * Since an NMI is involved, there's a risk of contending with
621 * a panic. And side-effects of NMIs may result in entry into,
622 * and continuing from, the debugger being unreliable.
624 kprintf_break_lock();
625 kprintf("Debugger entry requested by NMI\n");
626 kdp_i386_trap(T_DEBUG
, saved_state64(regs
), 0, 0);
627 printf("Debugger entry requested by NMI\n");
629 mp_kdp_wait(FALSE
, FALSE
);
631 if (pmsafe_debug
&& !kdp_snapshot
)
632 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
640 * cpu_interrupt is really just to be used by the scheduler to
641 * get a CPU's attention it may not always issue an IPI. If an
642 * IPI is always needed then use i386_cpu_IPI.
645 cpu_interrupt(int cpu
)
647 boolean_t did_IPI
= FALSE
;
650 && pmCPUExitIdle(cpu_datap(cpu
))) {
655 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, did_IPI
, 0, 0, 0);
659 * Send a true NMI via the local APIC to the specified CPU.
662 cpu_NMI_interrupt(int cpu
)
664 if (smp_initialized
) {
673 boolean_t intrs_enabled
;
674 uint64_t tsc_timeout
;
676 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
678 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
679 if (!cpu_datap(cpu
)->cpu_running
)
681 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
682 cpu_NMI_interrupt(cpu
);
683 tsc_timeout
= !machine_timeout_suspended() ?
684 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
686 while (!cpu_datap(cpu
)->cpu_NMI_acknowledged
) {
687 handle_pending_TLB_flushes();
689 if (rdtsc64() > tsc_timeout
)
690 panic("NMI_cpus() timeout cpu %d", cpu
);
692 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
695 ml_set_interrupts_enabled(intrs_enabled
);
698 static void (* volatile mp_PM_func
)(void) = NULL
;
703 assert(!ml_get_interrupts_enabled());
705 if (mp_PM_func
!= NULL
)
710 cpu_PM_interrupt(int cpu
)
712 assert(!ml_get_interrupts_enabled());
714 if (mp_PM_func
!= NULL
) {
715 if (cpu
== cpu_number())
718 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
723 PM_interrupt_register(void (*fn
)(void))
729 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
731 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
732 uint64_t tsc_timeout
;
735 if (!cpu_datap(cpu
)->cpu_running
)
738 if (event
== MP_TLB_FLUSH
)
739 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
741 DBGLOG(cpu_signal
, cpu
, event
);
743 i_bit_set(event
, signals
);
747 tsc_timeout
= !machine_timeout_suspended() ?
748 rdtsc64() + (1000*1000*1000) :
750 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
753 if (i_bit(event
, signals
)) {
754 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
759 if (event
== MP_TLB_FLUSH
)
760 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
764 * Send event to all running cpus.
765 * Called with the topology locked.
768 i386_signal_cpus(mp_event_t event
, mp_sync_t mode
)
771 unsigned int my_cpu
= cpu_number();
773 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
775 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
776 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
778 i386_signal_cpu(cpu
, event
, mode
);
783 * Return the number of running cpus.
784 * Called with the topology locked.
787 i386_active_cpus(void)
790 unsigned int ncpus
= 0;
792 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
794 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
795 if (cpu_datap(cpu
)->cpu_running
)
802 * Helper function called when busy-waiting: panic if too long
803 * a TSC-based time has elapsed since the start of the spin.
806 mp_spin_timeout(uint64_t tsc_start
)
808 uint64_t tsc_timeout
;
811 if (machine_timeout_suspended())
815 * The timeout is 4 * the spinlock timeout period
816 * unless we have serial console printing (kprintf) enabled
817 * in which case we allow an even greater margin.
819 tsc_timeout
= disable_serial_output
? (uint64_t) LockTimeOutTSC
<< 2
820 : (uint64_t) LockTimeOutTSC
<< 4;
821 return (rdtsc64() > tsc_start
+ tsc_timeout
);
825 * Helper function to take a spinlock while ensuring that incoming IPIs
826 * are still serviced if interrupts are masked while we spin.
829 mp_safe_spin_lock(usimple_lock_t lock
)
831 if (ml_get_interrupts_enabled()) {
835 uint64_t tsc_spin_start
= rdtsc64();
836 while (!simple_lock_try(lock
)) {
837 cpu_signal_handler(NULL
);
838 if (mp_spin_timeout(tsc_spin_start
)) {
840 uintptr_t lowner
= (uintptr_t)
841 lock
->interlock
.lock_data
;
842 spinlock_timed_out
= lock
;
843 lock_cpu
= spinlock_timeout_NMI(lowner
);
844 panic("mp_safe_spin_lock() timed out,"
845 " lock: %p, owner thread: 0x%lx,"
846 " current_thread: %p, owner on CPU 0x%x",
848 current_thread(), lock_cpu
);
856 * All-CPU rendezvous:
857 * - CPUs are signalled,
858 * - all execute the setup function (if specified),
859 * - rendezvous (i.e. all cpus reach a barrier),
860 * - all execute the action function (if specified),
861 * - rendezvous again,
862 * - execute the teardown function (if specified), and then
865 * Note that the supplied external functions _must_ be reentrant and aware
866 * that they are running in parallel and in an unknown lock context.
870 mp_rendezvous_action(void)
872 boolean_t intrs_enabled
;
873 uint64_t tsc_spin_start
;
876 if (mp_rv_setup_func
!= NULL
)
877 mp_rv_setup_func(mp_rv_func_arg
);
879 intrs_enabled
= ml_get_interrupts_enabled();
881 /* spin on entry rendezvous */
882 atomic_incl(&mp_rv_entry
, 1);
883 tsc_spin_start
= rdtsc64();
885 while (mp_rv_entry
< mp_rv_ncpus
) {
886 /* poll for pesky tlb flushes if interrupts disabled */
888 handle_pending_TLB_flushes();
889 if (mp_spin_timeout(tsc_spin_start
)) {
890 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
894 /* action function */
895 if (mp_rv_action_func
!= NULL
)
896 mp_rv_action_func(mp_rv_func_arg
);
898 /* spin on exit rendezvous */
899 atomic_incl(&mp_rv_exit
, 1);
900 tsc_spin_start
= rdtsc64();
901 while (mp_rv_exit
< mp_rv_ncpus
) {
903 handle_pending_TLB_flushes();
904 if (mp_spin_timeout(tsc_spin_start
))
905 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
908 /* teardown function */
909 if (mp_rv_teardown_func
!= NULL
)
910 mp_rv_teardown_func(mp_rv_func_arg
);
912 /* Bump completion count */
913 atomic_incl(&mp_rv_complete
, 1);
917 mp_rendezvous(void (*setup_func
)(void *),
918 void (*action_func
)(void *),
919 void (*teardown_func
)(void *),
922 uint64_t tsc_spin_start
;
924 if (!smp_initialized
) {
925 if (setup_func
!= NULL
)
927 if (action_func
!= NULL
)
929 if (teardown_func
!= NULL
)
934 /* obtain rendezvous lock */
935 (void) mp_safe_spin_lock(&mp_rv_lock
);
937 /* set static function pointers */
938 mp_rv_setup_func
= setup_func
;
939 mp_rv_action_func
= action_func
;
940 mp_rv_teardown_func
= teardown_func
;
941 mp_rv_func_arg
= arg
;
948 * signal other processors, which will call mp_rendezvous_action()
949 * with interrupts disabled
951 (void) mp_safe_spin_lock(&x86_topo_lock
);
952 mp_rv_ncpus
= i386_active_cpus();
953 i386_signal_cpus(MP_RENDEZVOUS
, ASYNC
);
954 simple_unlock(&x86_topo_lock
);
956 /* call executor function on this cpu */
957 mp_rendezvous_action();
960 * Spin for everyone to complete.
961 * This is necessary to ensure that all processors have proceeded
962 * from the exit barrier before we release the rendezvous structure.
964 tsc_spin_start
= rdtsc64();
965 while (mp_rv_complete
< mp_rv_ncpus
) {
966 if (mp_spin_timeout(tsc_spin_start
))
967 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
971 mp_rv_setup_func
= NULL
;
972 mp_rv_action_func
= NULL
;
973 mp_rv_teardown_func
= NULL
;
974 mp_rv_func_arg
= NULL
;
977 simple_unlock(&mp_rv_lock
);
981 mp_rendezvous_break_lock(void)
983 simple_lock_init(&mp_rv_lock
, 0);
987 setup_disable_intrs(__unused
void * param_not_used
)
989 /* disable interrupts before the first barrier */
990 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
992 current_cpu_datap()->cpu_iflag
= intr
;
993 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
997 teardown_restore_intrs(__unused
void * param_not_used
)
999 /* restore interrupt flag following MTRR changes */
1000 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
1001 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
1005 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1006 * This is exported for use by kexts.
1009 mp_rendezvous_no_intrs(
1010 void (*action_func
)(void *),
1013 mp_rendezvous(setup_disable_intrs
,
1015 teardown_restore_intrs
,
1021 queue_chain_t link
; /* queue linkage */
1022 void (*func
)(void *,void *); /* routine to call */
1023 void *arg0
; /* routine's 1st arg */
1024 void *arg1
; /* routine's 2nd arg */
1025 cpumask_t
*maskp
; /* completion response mask */
1031 decl_simple_lock_data(, lock
);
1033 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1034 static mp_call_queue_t mp_cpus_call_freelist
;
1035 static mp_call_queue_t mp_cpus_call_head
[MAX_CPUS
];
1037 static inline boolean_t
1038 mp_call_head_lock(mp_call_queue_t
*cqp
)
1040 boolean_t intrs_enabled
;
1042 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1043 simple_lock(&cqp
->lock
);
1045 return intrs_enabled
;
1049 mp_cpus_NMIPI(cpumask_t cpu_mask
) {
1050 unsigned int cpu
, cpu_bit
;
1053 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
1054 if (cpu_mask
& cpu_bit
)
1055 cpu_NMI_interrupt(cpu
);
1057 deadline
= mach_absolute_time() + (LockTimeOut
);
1058 while (mach_absolute_time() < deadline
)
1063 static inline boolean_t
1064 mp_call_head_is_locked(mp_call_queue_t
*cqp
)
1066 return !ml_get_interrupts_enabled() &&
1067 hw_lock_held((hw_lock_t
)&cqp
->lock
);
1072 mp_call_head_unlock(mp_call_queue_t
*cqp
, boolean_t intrs_enabled
)
1074 simple_unlock(&cqp
->lock
);
1075 ml_set_interrupts_enabled(intrs_enabled
);
1078 static inline mp_call_t
*
1081 mp_call_t
*callp
= NULL
;
1082 boolean_t intrs_enabled
;
1083 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1085 intrs_enabled
= mp_call_head_lock(cqp
);
1086 if (!queue_empty(&cqp
->queue
))
1087 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1088 mp_call_head_unlock(cqp
, intrs_enabled
);
1094 mp_call_free(mp_call_t
*callp
)
1096 boolean_t intrs_enabled
;
1097 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1099 intrs_enabled
= mp_call_head_lock(cqp
);
1100 queue_enter_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1101 mp_call_head_unlock(cqp
, intrs_enabled
);
1104 static inline mp_call_t
*
1105 mp_call_dequeue_locked(mp_call_queue_t
*cqp
)
1107 mp_call_t
*callp
= NULL
;
1109 assert(mp_call_head_is_locked(cqp
));
1110 if (!queue_empty(&cqp
->queue
))
1111 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1116 mp_call_enqueue_locked(
1117 mp_call_queue_t
*cqp
,
1120 queue_enter(&cqp
->queue
, callp
, typeof(callp
), link
);
1123 /* Called on the boot processor to initialize global structures */
1125 mp_cpus_call_init(void)
1127 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1129 DBG("mp_cpus_call_init()\n");
1130 simple_lock_init(&cqp
->lock
, 0);
1131 queue_init(&cqp
->queue
);
1135 * Called at processor registration to add call buffers to the free list
1136 * and to initialize the per-cpu call queue.
1139 mp_cpus_call_cpu_init(int cpu
)
1142 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1145 simple_lock_init(&cqp
->lock
, 0);
1146 queue_init(&cqp
->queue
);
1147 for (i
= 0; i
< MP_CPUS_CALL_BUFS_PER_CPU
; i
++) {
1148 callp
= (mp_call_t
*) kalloc(sizeof(mp_call_t
));
1149 mp_call_free(callp
);
1152 DBG("mp_cpus_call_init(%d) done\n", cpu
);
1156 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1157 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1160 mp_cpus_call_action(void)
1162 mp_call_queue_t
*cqp
;
1163 boolean_t intrs_enabled
;
1167 assert(!ml_get_interrupts_enabled());
1168 cqp
= &mp_cpus_call_head
[cpu_number()];
1169 intrs_enabled
= mp_call_head_lock(cqp
);
1170 while ((callp
= mp_call_dequeue_locked(cqp
)) != NULL
) {
1171 /* Copy call request to the stack to free buffer */
1173 mp_call_free(callp
);
1174 if (call
.func
!= NULL
) {
1175 mp_call_head_unlock(cqp
, intrs_enabled
);
1176 KERNEL_DEBUG_CONSTANT(
1177 TRACE_MP_CPUS_CALL_ACTION
,
1178 VM_KERNEL_UNSLIDE(call
.func
), VM_KERNEL_UNSLIDE_OR_PERM(call
.arg0
),
1179 VM_KERNEL_UNSLIDE_OR_PERM(call
.arg1
), VM_KERNEL_ADDRPERM(call
.maskp
), 0);
1180 call
.func(call
.arg0
, call
.arg1
);
1181 (void) mp_call_head_lock(cqp
);
1183 if (call
.maskp
!= NULL
)
1184 i_bit_set(cpu_number(), call
.maskp
);
1186 mp_call_head_unlock(cqp
, intrs_enabled
);
1190 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1191 * Possible modes are:
1192 * SYNC: function is called serially on target cpus in logical cpu order
1193 * waiting for each call to be acknowledged before proceeding
1194 * ASYNC: function call is queued to the specified cpus
1195 * waiting for all calls to complete in parallel before returning
1196 * NOSYNC: function calls are queued
1197 * but we return before confirmation of calls completing.
1198 * The action function may be NULL.
1199 * The cpu mask may include the local cpu. Offline cpus are ignored.
1200 * The return value is the number of cpus on which the call was made or queued.
1206 void (*action_func
)(void *),
1209 return mp_cpus_call1(
1212 (void (*)(void *,void *))action_func
,
1220 mp_cpus_call_wait(boolean_t intrs_enabled
,
1221 cpumask_t cpus_called
,
1222 cpumask_t
*cpus_responded
)
1224 mp_call_queue_t
*cqp
;
1225 uint64_t tsc_spin_start
;
1227 cqp
= &mp_cpus_call_head
[cpu_number()];
1229 tsc_spin_start
= rdtsc64();
1230 while (*cpus_responded
!= cpus_called
) {
1231 if (!intrs_enabled
) {
1232 /* Sniffing w/o locking */
1233 if (!queue_empty(&cqp
->queue
))
1234 mp_cpus_call_action();
1235 cpu_signal_handler(NULL
);
1237 if (mp_spin_timeout(tsc_spin_start
)) {
1238 cpumask_t cpus_unresponsive
;
1240 mp_cpus_call_wait_timeout
= TRUE
;
1241 cpus_unresponsive
= cpus_called
& ~(*cpus_responded
);
1242 mp_cpus_NMIPI(cpus_unresponsive
);
1243 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1253 void (*action_func
)(void *, void *),
1256 cpumask_t
*cpus_calledp
,
1257 cpumask_t
*cpus_notcalledp
)
1260 boolean_t intrs_enabled
= FALSE
;
1261 boolean_t call_self
= FALSE
;
1262 cpumask_t cpus_called
= 0;
1263 cpumask_t cpus_notcalled
= 0;
1264 cpumask_t cpus_responded
= 0;
1265 long cpus_call_count
= 0;
1266 uint64_t tsc_spin_start
;
1267 boolean_t topo_lock
;
1269 KERNEL_DEBUG_CONSTANT(
1270 TRACE_MP_CPUS_CALL
| DBG_FUNC_START
,
1271 cpus
, mode
, VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
));
1273 if (!smp_initialized
) {
1274 if ((cpus
& CPUMASK_SELF
) == 0)
1276 if (action_func
!= NULL
) {
1277 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1278 action_func(arg0
, arg1
);
1279 ml_set_interrupts_enabled(intrs_enabled
);
1286 * Queue the call for each non-local requested cpu.
1287 * This is performed under the topo lock to prevent changes to
1288 * cpus online state and to prevent concurrent rendezvouses --
1289 * although an exception is made if we're calling only the master
1290 * processor since that always remains active. Note: this exception
1291 * is expected for longterm timer nosync cross-calls to the master cpu.
1293 mp_disable_preemption();
1294 intrs_enabled
= ml_get_interrupts_enabled();
1295 topo_lock
= (cpus
!= cpu_to_cpumask(master_cpu
));
1297 ml_set_interrupts_enabled(FALSE
);
1298 (void) mp_safe_spin_lock(&x86_topo_lock
);
1300 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1301 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
1302 !cpu_datap(cpu
)->cpu_running
)
1304 tsc_spin_start
= rdtsc64();
1305 if (cpu
== (cpu_t
) cpu_number()) {
1307 * We don't IPI ourself and if calling asynchronously,
1308 * we defer our call until we have signalled all others.
1311 if (mode
== SYNC
&& action_func
!= NULL
) {
1312 KERNEL_DEBUG_CONSTANT(
1313 TRACE_MP_CPUS_CALL_LOCAL
,
1314 VM_KERNEL_UNSLIDE(action_func
),
1315 VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1316 action_func(arg0
, arg1
);
1320 * Here to queue a call to cpu and IPI.
1321 * Spinning for request buffer unless NOSYNC.
1323 mp_call_t
*callp
= NULL
;
1324 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1325 boolean_t intrs_inner
;
1329 callp
= mp_call_alloc();
1330 intrs_inner
= mp_call_head_lock(cqp
);
1331 if (mode
== NOSYNC
) {
1332 if (callp
== NULL
) {
1333 cpus_notcalled
|= cpu_to_cpumask(cpu
);
1334 mp_call_head_unlock(cqp
, intrs_inner
);
1335 KERNEL_DEBUG_CONSTANT(
1336 TRACE_MP_CPUS_CALL_NOBUF
,
1340 callp
->maskp
= NULL
;
1342 if (callp
== NULL
) {
1343 mp_call_head_unlock(cqp
, intrs_inner
);
1344 KERNEL_DEBUG_CONSTANT(
1345 TRACE_MP_CPUS_CALL_NOBUF
,
1348 /* Sniffing w/o locking */
1349 if (!queue_empty(&cqp
->queue
))
1350 mp_cpus_call_action();
1351 handle_pending_TLB_flushes();
1353 if (mp_spin_timeout(tsc_spin_start
))
1354 panic("mp_cpus_call1() timeout");
1357 callp
->maskp
= &cpus_responded
;
1359 callp
->func
= action_func
;
1362 mp_call_enqueue_locked(cqp
, callp
);
1364 cpus_called
|= cpu_to_cpumask(cpu
);
1365 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
1366 mp_call_head_unlock(cqp
, intrs_inner
);
1368 mp_cpus_call_wait(intrs_inner
, cpus_called
, &cpus_responded
);
1373 simple_unlock(&x86_topo_lock
);
1374 ml_set_interrupts_enabled(intrs_enabled
);
1377 /* Call locally if mode not SYNC */
1378 if (mode
!= SYNC
&& call_self
) {
1379 KERNEL_DEBUG_CONSTANT(
1380 TRACE_MP_CPUS_CALL_LOCAL
,
1381 VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1382 if (action_func
!= NULL
) {
1383 ml_set_interrupts_enabled(FALSE
);
1384 action_func(arg0
, arg1
);
1385 ml_set_interrupts_enabled(intrs_enabled
);
1389 /* Safe to allow pre-emption now */
1390 mp_enable_preemption();
1392 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1394 mp_cpus_call_wait(intrs_enabled
, cpus_called
, &cpus_responded
);
1398 cpus_called
|= cpu_to_cpumask(cpu
);
1403 *cpus_calledp
= cpus_called
;
1404 if (cpus_notcalledp
)
1405 *cpus_notcalledp
= cpus_notcalled
;
1407 KERNEL_DEBUG_CONSTANT(
1408 TRACE_MP_CPUS_CALL
| DBG_FUNC_END
,
1409 cpus_call_count
, cpus_called
, cpus_notcalled
, 0, 0);
1411 return (cpu_t
) cpus_call_count
;
1416 mp_broadcast_action(void)
1418 /* call action function */
1419 if (mp_bc_action_func
!= NULL
)
1420 mp_bc_action_func(mp_bc_func_arg
);
1422 /* if we're the last one through, wake up the instigator */
1423 if (atomic_decl_and_test(&mp_bc_count
, 1))
1424 thread_wakeup(((event_t
)(uintptr_t) &mp_bc_count
));
1428 * mp_broadcast() runs a given function on all active cpus.
1429 * The caller blocks until the functions has run on all cpus.
1430 * The caller will also block if there is another pending braodcast.
1434 void (*action_func
)(void *),
1437 if (!smp_initialized
) {
1438 if (action_func
!= NULL
)
1443 /* obtain broadcast lock */
1444 lck_mtx_lock(&mp_bc_lock
);
1446 /* set static function pointers */
1447 mp_bc_action_func
= action_func
;
1448 mp_bc_func_arg
= arg
;
1450 assert_wait((event_t
)(uintptr_t)&mp_bc_count
, THREAD_UNINT
);
1453 * signal other processors, which will call mp_broadcast_action()
1455 simple_lock(&x86_topo_lock
);
1456 mp_bc_ncpus
= i386_active_cpus(); /* total including this cpu */
1457 mp_bc_count
= mp_bc_ncpus
;
1458 i386_signal_cpus(MP_BROADCAST
, ASYNC
);
1460 /* call executor function on this cpu */
1461 mp_broadcast_action();
1462 simple_unlock(&x86_topo_lock
);
1464 /* block for all cpus to have run action_func */
1465 if (mp_bc_ncpus
> 1)
1466 thread_block(THREAD_CONTINUE_NULL
);
1468 clear_wait(current_thread(), THREAD_AWAKENED
);
1471 lck_mtx_unlock(&mp_bc_lock
);
1475 mp_cpus_kick(cpumask_t cpus
)
1478 boolean_t intrs_enabled
= FALSE
;
1480 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1481 mp_safe_spin_lock(&x86_topo_lock
);
1483 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1484 if ((cpu
== (cpu_t
) cpu_number())
1485 || ((cpu_to_cpumask(cpu
) & cpus
) == 0)
1486 || (!cpu_datap(cpu
)->cpu_running
))
1491 lapic_send_ipi(cpu
, LAPIC_VECTOR(KICK
));
1494 simple_unlock(&x86_topo_lock
);
1495 ml_set_interrupts_enabled(intrs_enabled
);
1499 i386_activate_cpu(void)
1501 cpu_data_t
*cdp
= current_cpu_datap();
1503 assert(!ml_get_interrupts_enabled());
1505 if (!smp_initialized
) {
1506 cdp
->cpu_running
= TRUE
;
1510 simple_lock(&x86_topo_lock
);
1511 cdp
->cpu_running
= TRUE
;
1513 simple_unlock(&x86_topo_lock
);
1518 i386_deactivate_cpu(void)
1520 cpu_data_t
*cdp
= current_cpu_datap();
1522 assert(!ml_get_interrupts_enabled());
1524 KERNEL_DEBUG_CONSTANT(
1525 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_START
,
1528 simple_lock(&x86_topo_lock
);
1529 cdp
->cpu_running
= FALSE
;
1530 simple_unlock(&x86_topo_lock
);
1533 * Move all of this cpu's timers to the master/boot cpu,
1534 * and poke it in case there's a sooner deadline for it to schedule.
1536 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1537 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, timer_queue_expire_local
, NULL
);
1540 * Open an interrupt window
1541 * and ensure any pending IPI or timer is serviced
1543 mp_disable_preemption();
1544 ml_set_interrupts_enabled(TRUE
);
1546 while (cdp
->cpu_signals
&& x86_lcpu()->rtcDeadline
!= EndOfAllTime
)
1549 * Ensure there's no remaining timer deadline set
1550 * - AICPM may have left one active.
1554 ml_set_interrupts_enabled(FALSE
);
1555 mp_enable_preemption();
1557 KERNEL_DEBUG_CONSTANT(
1558 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_END
,
1562 int pmsafe_debug
= 1;
1565 volatile boolean_t mp_kdp_trap
= FALSE
;
1566 volatile unsigned long mp_kdp_ncpus
;
1567 boolean_t mp_kdp_state
;
1574 unsigned int ncpus
= 0;
1575 unsigned int my_cpu
;
1576 uint64_t tsc_timeout
;
1578 DBG("mp_kdp_enter()\n");
1581 if (!smp_initialized
)
1582 simple_lock_init(&mp_kdp_lock
, 0);
1586 * Here to enter the debugger.
1587 * In case of races, only one cpu is allowed to enter kdp after
1590 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1591 my_cpu
= cpu_number();
1593 if (my_cpu
== (unsigned) debugger_cpu
) {
1594 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1599 cpu_datap(my_cpu
)->debugger_entry_time
= mach_absolute_time();
1600 simple_lock(&mp_kdp_lock
);
1602 if (pmsafe_debug
&& !kdp_snapshot
)
1603 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1605 while (mp_kdp_trap
) {
1606 simple_unlock(&mp_kdp_lock
);
1607 DBG("mp_kdp_enter() race lost\n");
1609 mp_kdp_wait(TRUE
, FALSE
);
1611 simple_lock(&mp_kdp_lock
);
1613 debugger_cpu
= my_cpu
;
1615 mp_kdp_ncpus
= 1; /* self */
1617 debugger_entry_time
= cpu_datap(my_cpu
)->debugger_entry_time
;
1618 simple_unlock(&mp_kdp_lock
);
1621 * Deliver a nudge to other cpus, counting how many
1623 DBG("mp_kdp_enter() signaling other processors\n");
1624 if (force_immediate_debugger_NMI
== FALSE
) {
1625 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1626 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1629 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1632 * Wait other processors to synchronize
1634 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1637 * This timeout is rather arbitrary; we don't want to NMI
1638 * processors that are executing at potentially
1639 * "unsafe-to-interrupt" points such as the trampolines,
1640 * but neither do we want to lose state by waiting too long.
1642 tsc_timeout
= rdtsc64() + (ncpus
* 1000 * 1000 * 10ULL);
1645 tsc_timeout
= ~0ULL;
1647 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1649 * A TLB shootdown request may be pending--this would
1650 * result in the requesting processor waiting in
1651 * PMAP_UPDATE_TLBS() until this processor deals with it.
1652 * Process it, so it can now enter mp_kdp_wait()
1654 handle_pending_TLB_flushes();
1657 /* If we've timed out, and some processor(s) are still unresponsive,
1658 * interrupt them with an NMI via the local APIC.
1660 if (mp_kdp_ncpus
!= ncpus
) {
1661 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1662 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1664 if (cpu_signal_pending(cpu
, MP_KDP
))
1665 cpu_NMI_interrupt(cpu
);
1670 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1671 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1673 cpu_NMI_interrupt(cpu
);
1676 DBG("mp_kdp_enter() %d processors done %s\n",
1677 (int)mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1679 postcode(MP_KDP_ENTER
);
1683 cpu_signal_pending(int cpu
, mp_event_t event
)
1685 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1686 boolean_t retval
= FALSE
;
1688 if (i_bit(event
, signals
))
1693 long kdp_x86_xcpu_invoke(const uint16_t lcpu
, kdp_x86_xcpu_func_t func
,
1694 void *arg0
, void *arg1
)
1696 if (lcpu
> (real_ncpus
- 1))
1702 kdp_xcpu_call_func
.func
= func
;
1703 kdp_xcpu_call_func
.ret
= -1;
1704 kdp_xcpu_call_func
.arg0
= arg0
;
1705 kdp_xcpu_call_func
.arg1
= arg1
;
1706 kdp_xcpu_call_func
.cpu
= lcpu
;
1707 DBG("Invoking function %p on CPU %d\n", func
, (int32_t)lcpu
);
1708 while (kdp_xcpu_call_func
.cpu
!= KDP_XCPU_NONE
)
1710 return kdp_xcpu_call_func
.ret
;
1714 kdp_x86_xcpu_poll(void)
1716 if ((uint16_t)cpu_number() == kdp_xcpu_call_func
.cpu
) {
1717 kdp_xcpu_call_func
.ret
=
1718 kdp_xcpu_call_func
.func(kdp_xcpu_call_func
.arg0
,
1719 kdp_xcpu_call_func
.arg1
,
1721 kdp_xcpu_call_func
.cpu
= KDP_XCPU_NONE
;
1726 mp_kdp_wait(boolean_t flush
, boolean_t isNMI
)
1728 DBG("mp_kdp_wait()\n");
1729 /* If an I/O port has been specified as a debugging aid, issue a read */
1730 panic_io_port_read();
1731 current_cpu_datap()->debugger_ipi_time
= mach_absolute_time();
1733 /* If we've trapped due to a machine-check, save MCA registers */
1737 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1738 while (mp_kdp_trap
|| (isNMI
== TRUE
)) {
1740 * A TLB shootdown request may be pending--this would result
1741 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1742 * until this processor handles it.
1743 * Process it, so it can now enter mp_kdp_wait()
1746 handle_pending_TLB_flushes();
1748 kdp_x86_xcpu_poll();
1752 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1753 DBG("mp_kdp_wait() done\n");
1759 DBG("mp_kdp_exit()\n");
1761 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1763 debugger_exit_time
= mach_absolute_time();
1765 mp_kdp_trap
= FALSE
;
1768 /* Wait other processors to stop spinning. XXX needs timeout */
1769 DBG("mp_kdp_exit() waiting for processors to resume\n");
1770 while (mp_kdp_ncpus
> 0) {
1772 * a TLB shootdown request may be pending... this would result in the requesting
1773 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1774 * Process it, so it can now enter mp_kdp_wait()
1776 handle_pending_TLB_flushes();
1781 if (pmsafe_debug
&& !kdp_snapshot
)
1782 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1784 debugger_exit_time
= mach_absolute_time();
1786 DBG("mp_kdp_exit() done\n");
1787 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1790 #endif /* MACH_KDP */
1793 mp_recent_debugger_activity(void) {
1794 uint64_t abstime
= mach_absolute_time();
1795 return (((abstime
- debugger_entry_time
) < LastDebuggerEntryAllowance
) ||
1796 ((abstime
- debugger_exit_time
) < LastDebuggerEntryAllowance
));
1802 __unused processor_t processor
)
1808 processor_t processor
)
1810 int cpu
= processor
->cpu_id
;
1812 if (cpu
!= cpu_number()) {
1813 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1814 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, 1, 0, 0, 0);
1819 slave_machine_init(void *param
)
1822 * Here in process context, but with interrupts disabled.
1824 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1826 if (param
== FULL_SLAVE_INIT
) {
1832 cpu_machine_init(); /* Interrupts enabled hereafter */
1836 int cpu_number(void)
1838 return get_cpu_number();
1846 simple_lock_init(&cpu_warm_lock
, 0);
1847 queue_init(&cpu_warm_call_list
);
1848 for (i
= 0; i
< NUM_CPU_WARM_CALLS
; i
++) {
1849 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)&cpu_warm_call_arr
[i
]);
1854 grab_warm_timer_call()
1857 timer_call_t call
= NULL
;
1860 simple_lock(&cpu_warm_lock
);
1861 if (!queue_empty(&cpu_warm_call_list
)) {
1862 call
= (timer_call_t
) dequeue_head(&cpu_warm_call_list
);
1864 simple_unlock(&cpu_warm_lock
);
1871 free_warm_timer_call(timer_call_t call
)
1876 simple_lock(&cpu_warm_lock
);
1877 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)call
);
1878 simple_unlock(&cpu_warm_lock
);
1883 * Runs in timer call context (interrupts disabled).
1886 cpu_warm_timer_call_func(
1887 call_entry_param_t p0
,
1888 __unused call_entry_param_t p1
)
1890 free_warm_timer_call((timer_call_t
)p0
);
1895 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1901 cpu_warm_data_t cwdp
= (cpu_warm_data_t
)arg
;
1903 timer_call_enter(cwdp
->cwd_call
, cwdp
->cwd_deadline
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
1904 cwdp
->cwd_result
= 0;
1910 * Not safe to call with interrupts disabled.
1913 ml_interrupt_prewarm(
1916 struct cpu_warm_data cwd
;
1920 if (ml_get_interrupts_enabled() == FALSE
) {
1921 panic("%s: Interrupts disabled?\n", __FUNCTION__
);
1925 * If the platform doesn't need our help, say that we succeeded.
1927 if (!ml_get_interrupt_prewake_applicable()) {
1928 return KERN_SUCCESS
;
1932 * Grab a timer call to use.
1934 call
= grab_warm_timer_call();
1936 return KERN_RESOURCE_SHORTAGE
;
1939 timer_call_setup(call
, cpu_warm_timer_call_func
, call
);
1940 cwd
.cwd_call
= call
;
1941 cwd
.cwd_deadline
= deadline
;
1945 * For now, non-local interrupts happen on the master processor.
1947 ct
= mp_cpus_call(cpu_to_cpumask(master_cpu
), SYNC
, _cpu_warm_setup
, &cwd
);
1949 free_warm_timer_call(call
);
1950 return KERN_FAILURE
;
1952 return cwd
.cwd_result
;