2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
33 #include <kdp/kdp_internal.h>
34 #include <mach_ldebug.h>
36 #include <mach/mach_types.h>
37 #include <mach/kern_return.h>
39 #include <kern/kern_types.h>
40 #include <kern/startup.h>
41 #include <kern/timer_queue.h>
42 #include <kern/processor.h>
43 #include <kern/cpu_number.h>
44 #include <kern/cpu_data.h>
45 #include <kern/assert.h>
46 #include <kern/lock_group.h>
47 #include <kern/machine.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/zalloc.h>
52 #include <kern/queue.h>
53 #include <prng/random.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_kern.h>
58 #include <i386/bit_routines.h>
59 #include <i386/proc_reg.h>
60 #include <i386/cpu_threads.h>
61 #include <i386/mp_desc.h>
62 #include <i386/misc_protos.h>
63 #include <i386/trap.h>
64 #include <i386/postcode.h>
65 #include <i386/machine_routines.h>
67 #include <i386/mp_events.h>
68 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/pmCPU.h>
74 #include <i386/machine_check.h>
76 #include <i386/acpi.h>
78 #include <sys/kdebug.h>
80 #include <console/serial_protos.h>
83 #include <kern/monotonic.h>
84 #endif /* MONOTONIC */
87 #define PAUSE delay(1000000)
88 #define DBG(x...) kprintf(x)
94 /* Debugging/test trace events: */
95 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
96 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
97 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
98 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
99 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
100 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
101 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
102 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
104 #define ABS(v) (((v) > 0)?(v):-(v))
106 void slave_boot_init(void);
107 void i386_cpu_IPI(int cpu
);
110 static void mp_kdp_wait(boolean_t flush
, boolean_t isNMI
);
111 #endif /* MACH_KDP */
114 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
115 #endif /* MACH_KDP */
116 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
118 boolean_t smp_initialized
= FALSE
;
119 uint32_t TSC_sync_margin
= 0xFFF;
120 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
121 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
122 #if DEBUG || DEVELOPMENT
123 boolean_t mp_interrupt_watchdog_enabled
= TRUE
;
124 uint32_t mp_interrupt_watchdog_events
= 0;
127 SIMPLE_LOCK_DECLARE(debugger_callback_lock
, 0);
128 struct debugger_callback
*debugger_callback
= NULL
;
130 static LCK_GRP_DECLARE(smp_lck_grp
, "i386_smp");
131 static LCK_MTX_EARLY_DECLARE(mp_cpu_boot_lock
, &smp_lck_grp
);
133 /* Variables needed for MP rendezvous. */
134 SIMPLE_LOCK_DECLARE(mp_rv_lock
, 0);
135 static void (*mp_rv_setup_func
)(void *arg
);
136 static void (*mp_rv_action_func
)(void *arg
);
137 static void (*mp_rv_teardown_func
)(void *arg
);
138 static void *mp_rv_func_arg
;
139 static volatile int mp_rv_ncpus
;
140 /* Cache-aligned barriers: */
141 static volatile long mp_rv_entry
__attribute__((aligned(64)));
142 static volatile long mp_rv_exit
__attribute__((aligned(64)));
143 static volatile long mp_rv_complete
__attribute__((aligned(64)));
145 volatile uint64_t debugger_entry_time
;
146 volatile uint64_t debugger_exit_time
;
149 extern int kdp_snapshot
;
150 static struct _kdp_xcpu_call_func
{
151 kdp_x86_xcpu_func_t func
;
154 volatile uint16_t cpu
;
155 } kdp_xcpu_call_func
= {
161 /* Variables needed for MP broadcast. */
162 static void (*mp_bc_action_func
)(void *arg
);
163 static void *mp_bc_func_arg
;
164 static int mp_bc_ncpus
;
165 static volatile long mp_bc_count
;
166 static LCK_MTX_EARLY_DECLARE(mp_bc_lock
, &smp_lck_grp
);
167 static volatile int debugger_cpu
= -1;
168 volatile long NMIPI_acks
= 0;
169 volatile long NMI_count
= 0;
170 static NMI_reason_t NMI_panic_reason
= NONE
;
171 static int vector_timed_out
;
173 extern void NMI_cpus(void);
175 static void mp_cpus_call_init(void);
176 static void mp_cpus_call_action(void);
177 static void mp_call_PM(void);
179 char mp_slave_stack
[PAGE_SIZE
] __attribute__((aligned(PAGE_SIZE
))); // Temp stack for slave init
181 /* PAL-related routines */
182 boolean_t
i386_smp_init(int nmi_vector
, i386_intr_func_t nmi_handler
,
183 int ipi_vector
, i386_intr_func_t ipi_handler
);
184 void i386_start_cpu(int lapic_id
, int cpu_num
);
185 void i386_send_NMI(int cpu
);
186 void NMIPI_enable(boolean_t
);
188 #define NUM_CPU_WARM_CALLS 20
189 struct timer_call cpu_warm_call_arr
[NUM_CPU_WARM_CALLS
];
190 queue_head_t cpu_warm_call_list
;
191 decl_simple_lock_data(static, cpu_warm_lock
);
193 typedef struct cpu_warm_data
{
194 timer_call_t cwd_call
;
195 uint64_t cwd_deadline
;
199 static void cpu_prewarm_init(void);
200 static void cpu_warm_timer_call_func(timer_call_param_t p0
, timer_call_param_t p1
);
201 static void _cpu_warm_setup(void *arg
);
202 static timer_call_t
grab_warm_timer_call(void);
203 static void free_warm_timer_call(timer_call_t call
);
210 if (!i386_smp_init(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
,
211 LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
)) {
217 DBGLOG_CPU_INIT(master_cpu
);
220 mp_cpus_call_cpu_init(master_cpu
);
222 #if DEBUG || DEVELOPMENT
223 if (PE_parse_boot_argn("interrupt_watchdog",
224 &mp_interrupt_watchdog_enabled
,
225 sizeof(mp_interrupt_watchdog_enabled
))) {
226 kprintf("Interrupt watchdog %sabled\n",
227 mp_interrupt_watchdog_enabled
? "en" : "dis");
231 if (PE_parse_boot_argn("TSC_sync_margin",
232 &TSC_sync_margin
, sizeof(TSC_sync_margin
))) {
233 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin
);
234 } else if (cpuid_vmm_present()) {
235 kprintf("TSC sync margin disabled\n");
238 smp_initialized
= TRUE
;
249 } processor_start_info_t
;
250 static processor_start_info_t start_info
__attribute__((aligned(64)));
253 * Cache-alignment is to avoid cross-cpu false-sharing interference.
255 static volatile long tsc_entry_barrier
__attribute__((aligned(64)));
256 static volatile long tsc_exit_barrier
__attribute__((aligned(64)));
257 static volatile uint64_t tsc_target
__attribute__((aligned(64)));
260 * Poll a CPU to see when it has marked itself as running.
263 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
265 while (iters
-- > 0) {
266 if (cpu_datap(slot_num
)->cpu_running
) {
274 * Quickly bring a CPU back online which has been halted.
277 intel_startCPU_fast(int slot_num
)
282 * Try to perform a fast restart
284 rc
= pmCPUExitHalt(slot_num
);
285 if (rc
!= KERN_SUCCESS
) {
287 * The CPU was not eligible for a fast restart.
292 KERNEL_DEBUG_CONSTANT(
293 TRACE_MP_CPU_FAST_START
| DBG_FUNC_START
,
294 slot_num
, 0, 0, 0, 0);
297 * Wait until the CPU is back online.
299 mp_disable_preemption();
302 * We use short pauses (1us) for low latency. 30,000 iterations is
303 * longer than a full restart would require so it should be more
307 mp_wait_for_cpu_up(slot_num
, 30000, 1);
308 mp_enable_preemption();
310 KERNEL_DEBUG_CONSTANT(
311 TRACE_MP_CPU_FAST_START
| DBG_FUNC_END
,
312 slot_num
, cpu_datap(slot_num
)->cpu_running
, 0, 0, 0);
315 * Check to make sure that the CPU is really running. If not,
316 * go through the slow path.
318 if (cpu_datap(slot_num
)->cpu_running
) {
328 /* Here on the started cpu with cpu_running set TRUE */
330 if (TSC_sync_margin
&&
331 start_info
.target_cpu
== cpu_number()) {
333 * I've just started-up, synchronize again with the starter cpu
334 * and then snap my TSC.
337 atomic_decl(&tsc_entry_barrier
, 1);
338 while (tsc_entry_barrier
!= 0) {
339 ; /* spin for starter and target at barrier */
341 tsc_target
= rdtsc64();
342 atomic_decl(&tsc_exit_barrier
, 1);
350 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
352 /* Ignore this if the current processor is not the starter */
353 if (cpu_number() != psip
->starter_cpu
) {
357 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
358 arg
, psip
->target_cpu
, psip
->target_lapic
);
360 KERNEL_DEBUG_CONSTANT(
361 TRACE_MP_CPU_START
| DBG_FUNC_START
,
363 psip
->target_lapic
, 0, 0, 0);
365 i386_start_cpu(psip
->target_lapic
, psip
->target_cpu
);
367 #ifdef POSTCODE_DELAY
368 /* Wait much longer if postcodes are displayed for a delay period. */
371 DBG("start_cpu(%p) about to wait for cpu %d\n",
372 arg
, psip
->target_cpu
);
374 mp_wait_for_cpu_up(psip
->target_cpu
, i
* 100, 100);
376 KERNEL_DEBUG_CONSTANT(
377 TRACE_MP_CPU_START
| DBG_FUNC_END
,
379 cpu_datap(psip
->target_cpu
)->cpu_running
, 0, 0, 0);
381 if (TSC_sync_margin
&&
382 cpu_datap(psip
->target_cpu
)->cpu_running
) {
384 * Compare the TSC from the started processor with ours.
385 * Report and log/panic if it diverges by more than
386 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
387 * can be overriden by boot-arg (with 0 meaning no checking).
389 uint64_t tsc_starter
;
391 atomic_decl(&tsc_entry_barrier
, 1);
392 while (tsc_entry_barrier
!= 0) {
393 ; /* spin for both processors at barrier */
395 tsc_starter
= rdtsc64();
396 atomic_decl(&tsc_exit_barrier
, 1);
397 while (tsc_exit_barrier
!= 0) {
398 ; /* spin for target to store its TSC */
400 tsc_delta
= tsc_target
- tsc_starter
;
401 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
402 psip
->target_cpu
, tsc_target
, tsc_delta
, tsc_delta
);
403 #if DEBUG || DEVELOPMENT
405 * Stash the delta for inspection later, since we can no
406 * longer print/log it with interrupts disabled.
408 cpu_datap(psip
->target_cpu
)->tsc_sync_delta
= tsc_delta
;
410 if (ABS(tsc_delta
) > (int64_t) TSC_sync_margin
) {
416 "Unsynchronized TSC for cpu %d: "
417 "0x%016llx, delta 0x%llx\n",
418 psip
->target_cpu
, tsc_target
, tsc_delta
);
427 int lapic
= cpu_to_lapic
[slot_num
];
432 DBGLOG_CPU_INIT(slot_num
);
434 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
435 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) (uintptr_t)IdlePTD
);
438 * Initialize (or re-initialize) the descriptor tables for this cpu.
439 * Propagate processor mode to slave.
441 cpu_desc_init(cpu_datap(slot_num
));
443 /* Serialize use of the slave boot stack, etc. */
444 lck_mtx_lock(&mp_cpu_boot_lock
);
446 istate
= ml_set_interrupts_enabled(FALSE
);
447 if (slot_num
== get_cpu_number()) {
448 ml_set_interrupts_enabled(istate
);
449 lck_mtx_unlock(&mp_cpu_boot_lock
);
453 start_info
.starter_cpu
= cpu_number();
454 start_info
.target_cpu
= slot_num
;
455 start_info
.target_lapic
= lapic
;
456 tsc_entry_barrier
= 2;
457 tsc_exit_barrier
= 2;
460 * Perform the processor startup sequence with all running
461 * processors rendezvous'ed. This is required during periods when
462 * the cache-disable bit is set for MTRR/PAT initialization.
464 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
466 start_info
.target_cpu
= 0;
468 ml_set_interrupts_enabled(istate
);
469 lck_mtx_unlock(&mp_cpu_boot_lock
);
471 if (!cpu_datap(slot_num
)->cpu_running
) {
472 kprintf("Failed to start CPU %02d\n", slot_num
);
473 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
478 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
484 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
485 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
487 MP_EVENT_NAME_DECL();
489 #endif /* MP_DEBUG */
492 * Note: called with NULL state when polling for TLB flush and cross-calls.
495 cpu_signal_handler(x86_saved_state_t
*regs
)
498 #pragma unused (regs)
499 #endif /* !MACH_KDP */
501 volatile int *my_word
;
503 SCHED_STATS_INC(ipi_count
);
505 my_cpu
= cpu_number();
506 my_word
= &cpu_data_ptr
[my_cpu
]->cpu_signals
;
507 /* Store the initial set of signals for diagnostics. New
508 * signals could arrive while these are being processed
509 * so it's no more than a hint.
512 cpu_data_ptr
[my_cpu
]->cpu_prior_signals
= *my_word
;
516 if (i_bit(MP_KDP
, my_word
)) {
517 DBGLOG(cpu_handle
, my_cpu
, MP_KDP
);
518 i_bit_clear(MP_KDP
, my_word
);
519 /* Ensure that the i386_kernel_state at the base of the
520 * current thread's stack (if any) is synchronized with the
521 * context at the moment of the interrupt, to facilitate
522 * access through the debugger.
524 sync_iss_to_iks(regs
);
525 if (pmsafe_debug
&& !kdp_snapshot
) {
526 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
528 mp_kdp_wait(TRUE
, FALSE
);
529 if (pmsafe_debug
&& !kdp_snapshot
) {
530 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
533 #endif /* MACH_KDP */
534 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
535 DBGLOG(cpu_handle
, my_cpu
, MP_TLB_FLUSH
);
536 i_bit_clear(MP_TLB_FLUSH
, my_word
);
537 pmap_update_interrupt();
538 } else if (i_bit(MP_CALL
, my_word
)) {
539 DBGLOG(cpu_handle
, my_cpu
, MP_CALL
);
540 i_bit_clear(MP_CALL
, my_word
);
541 mp_cpus_call_action();
542 } else if (i_bit(MP_CALL_PM
, my_word
)) {
543 DBGLOG(cpu_handle
, my_cpu
, MP_CALL_PM
);
544 i_bit_clear(MP_CALL_PM
, my_word
);
548 /* Called to poll only for cross-calls and TLB flush */
550 } else if (i_bit(MP_AST
, my_word
)) {
551 DBGLOG(cpu_handle
, my_cpu
, MP_AST
);
552 i_bit_clear(MP_AST
, my_word
);
553 ast_check(cpu_to_processor(my_cpu
));
560 extern void kprintf_break_lock(void);
562 NMIInterruptHandler(x86_saved_state_t
*regs
)
566 uint64_t now
= mach_absolute_time();
568 if (panic_active() && !panicDebugging
) {
570 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
577 atomic_incl(&NMIPI_acks
, 1);
578 atomic_incl(&NMI_count
, 1);
579 sync_iss_to_iks_unconditionally(regs
);
580 __asm__
volatile ("movq %%rbp, %0" : "=m" (stackptr
));
582 if (cpu_number() == debugger_cpu
) {
586 if (NMI_panic_reason
== SPINLOCK_TIMEOUT
) {
587 snprintf(&pstr
[0], sizeof(pstr
),
588 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
589 cpu_number(), now
, spinlock_timed_out
, (void *) spinlock_timed_out
->interlock
.lock_data
, current_thread(), spinlock_owner_cpu
);
590 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
591 } else if (NMI_panic_reason
== TLB_FLUSH_TIMEOUT
) {
592 snprintf(&pstr
[0], sizeof(pstr
),
593 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
594 cpu_number(), now
, current_cpu_datap()->cpu_tlb_invalid
);
595 panic_i386_backtrace(stackptr
, 48, &pstr
[0], TRUE
, regs
);
596 } else if (NMI_panic_reason
== CROSSCALL_TIMEOUT
) {
597 snprintf(&pstr
[0], sizeof(pstr
),
598 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
600 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
601 } else if (NMI_panic_reason
== INTERRUPT_WATCHDOG
) {
602 snprintf(&pstr
[0], sizeof(pstr
),
603 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
604 cpu_number(), now
, vector_timed_out
);
605 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
609 if (pmsafe_debug
&& !kdp_snapshot
) {
610 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
612 current_cpu_datap()->cpu_NMI_acknowledged
= TRUE
;
613 i_bit_clear(MP_KDP
, ¤t_cpu_datap()->cpu_signals
);
614 if (panic_active() || NMI_panic_reason
!= NONE
) {
615 mp_kdp_wait(FALSE
, TRUE
);
616 } else if (!mp_kdp_trap
&&
618 virtualized
&& (debug_boot_arg
& DB_NMI
)) {
620 * Under a VMM with the debug boot-arg set, drop into kdp.
621 * Since an NMI is involved, there's a risk of contending with
622 * a panic. And side-effects of NMIs may result in entry into,
623 * and continuing from, the debugger being unreliable.
625 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI
, FALSE
, TRUE
)) {
626 kprintf_break_lock();
627 kprintf("Debugger entry requested by NMI\n");
628 kdp_i386_trap(T_DEBUG
, saved_state64(regs
), 0, 0);
629 printf("Debugger entry requested by NMI\n");
630 mp_kdp_is_NMI
= FALSE
;
632 mp_kdp_wait(FALSE
, FALSE
);
635 mp_kdp_wait(FALSE
, FALSE
);
637 if (pmsafe_debug
&& !kdp_snapshot
) {
638 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
647 * cpu_interrupt is really just to be used by the scheduler to
648 * get a CPU's attention it may not always issue an IPI. If an
649 * IPI is always needed then use i386_cpu_IPI.
652 cpu_interrupt(int cpu
)
654 boolean_t did_IPI
= FALSE
;
657 && pmCPUExitIdle(cpu_datap(cpu
))) {
662 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, did_IPI
, 0, 0, 0);
666 * Send a true NMI via the local APIC to the specified CPU.
669 cpu_NMI_interrupt(int cpu
)
671 if (smp_initialized
) {
680 boolean_t intrs_enabled
;
681 uint64_t tsc_timeout
;
683 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
685 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
686 if (!cpu_is_running(cpu
)) {
689 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
690 cpu_NMI_interrupt(cpu
);
691 tsc_timeout
= !machine_timeout_suspended() ?
692 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
694 while (!cpu_datap(cpu
)->cpu_NMI_acknowledged
) {
695 handle_pending_TLB_flushes();
697 if (rdtsc64() > tsc_timeout
) {
698 panic("NMI_cpus() timeout cpu %d", cpu
);
701 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
705 ml_set_interrupts_enabled(intrs_enabled
);
708 static void(*volatile mp_PM_func
)(void) = NULL
;
713 assert(!ml_get_interrupts_enabled());
715 if (mp_PM_func
!= NULL
) {
721 cpu_PM_interrupt(int cpu
)
723 assert(!ml_get_interrupts_enabled());
725 if (mp_PM_func
!= NULL
) {
726 if (cpu
== cpu_number()) {
729 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
735 PM_interrupt_register(void (*fn
)(void))
741 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
743 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
744 uint64_t tsc_timeout
;
747 if (!cpu_datap(cpu
)->cpu_running
) {
751 if (event
== MP_TLB_FLUSH
) {
752 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
755 DBGLOG(cpu_signal
, cpu
, event
);
757 i_bit_set(event
, signals
);
761 tsc_timeout
= !machine_timeout_suspended() ?
762 rdtsc64() + (1000 * 1000 * 1000) :
764 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
767 if (i_bit(event
, signals
)) {
768 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
773 if (event
== MP_TLB_FLUSH
) {
774 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
779 * Helper function called when busy-waiting: panic if too long
780 * a TSC-based time has elapsed since the start of the spin.
783 mp_spin_timeout(uint64_t tsc_start
)
785 uint64_t tsc_timeout
;
788 if (machine_timeout_suspended()) {
793 * The timeout is 4 * the spinlock timeout period
794 * unless we have serial console printing (kprintf) enabled
795 * in which case we allow an even greater margin.
797 tsc_timeout
= disable_serial_output
? LockTimeOutTSC
<< 2
798 : LockTimeOutTSC
<< 4;
799 return rdtsc64() > tsc_start
+ tsc_timeout
;
803 * Helper function to take a spinlock while ensuring that incoming IPIs
804 * are still serviced if interrupts are masked while we spin.
805 * Returns current interrupt state.
808 mp_safe_spin_lock(usimple_lock_t lock
)
810 if (ml_get_interrupts_enabled()) {
811 simple_lock(lock
, LCK_GRP_NULL
);
814 uint64_t tsc_spin_start
= rdtsc64();
815 while (!simple_lock_try(lock
, LCK_GRP_NULL
)) {
816 cpu_signal_handler(NULL
);
817 if (mp_spin_timeout(tsc_spin_start
)) {
819 uintptr_t lowner
= (uintptr_t)
820 lock
->interlock
.lock_data
;
821 spinlock_timed_out
= lock
;
822 lock_cpu
= spinlock_timeout_NMI(lowner
);
823 NMIPI_panic(cpu_to_cpumask(lock_cpu
), SPINLOCK_TIMEOUT
);
824 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
825 lock
, lowner
, current_thread(), lock_cpu
, mach_absolute_time());
833 * All-CPU rendezvous:
834 * - CPUs are signalled,
835 * - all execute the setup function (if specified),
836 * - rendezvous (i.e. all cpus reach a barrier),
837 * - all execute the action function (if specified),
838 * - rendezvous again,
839 * - execute the teardown function (if specified), and then
842 * Note that the supplied external functions _must_ be reentrant and aware
843 * that they are running in parallel and in an unknown lock context.
847 mp_rendezvous_action(__unused
void *null
)
849 boolean_t intrs_enabled
;
850 uint64_t tsc_spin_start
;
853 * Note that mp_rv_lock was acquired by the thread that initiated the
854 * rendezvous and must have been acquired before we enter
855 * mp_rendezvous_action().
857 current_cpu_datap()->cpu_rendezvous_in_progress
= TRUE
;
860 if (mp_rv_setup_func
!= NULL
) {
861 mp_rv_setup_func(mp_rv_func_arg
);
864 intrs_enabled
= ml_get_interrupts_enabled();
866 /* spin on entry rendezvous */
867 atomic_incl(&mp_rv_entry
, 1);
868 tsc_spin_start
= rdtsc64();
870 while (mp_rv_entry
< mp_rv_ncpus
) {
871 /* poll for pesky tlb flushes if interrupts disabled */
872 if (!intrs_enabled
) {
873 handle_pending_TLB_flushes();
875 if (mp_spin_timeout(tsc_spin_start
)) {
876 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
880 /* action function */
881 if (mp_rv_action_func
!= NULL
) {
882 mp_rv_action_func(mp_rv_func_arg
);
885 /* spin on exit rendezvous */
886 atomic_incl(&mp_rv_exit
, 1);
887 tsc_spin_start
= rdtsc64();
888 while (mp_rv_exit
< mp_rv_ncpus
) {
889 if (!intrs_enabled
) {
890 handle_pending_TLB_flushes();
892 if (mp_spin_timeout(tsc_spin_start
)) {
893 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
897 /* teardown function */
898 if (mp_rv_teardown_func
!= NULL
) {
899 mp_rv_teardown_func(mp_rv_func_arg
);
902 current_cpu_datap()->cpu_rendezvous_in_progress
= FALSE
;
904 /* Bump completion count */
905 atomic_incl(&mp_rv_complete
, 1);
909 mp_rendezvous(void (*setup_func
)(void *),
910 void (*action_func
)(void *),
911 void (*teardown_func
)(void *),
914 uint64_t tsc_spin_start
;
916 if (!smp_initialized
) {
917 if (setup_func
!= NULL
) {
920 if (action_func
!= NULL
) {
923 if (teardown_func
!= NULL
) {
929 /* obtain rendezvous lock */
930 mp_rendezvous_lock();
932 /* set static function pointers */
933 mp_rv_setup_func
= setup_func
;
934 mp_rv_action_func
= action_func
;
935 mp_rv_teardown_func
= teardown_func
;
936 mp_rv_func_arg
= arg
;
943 * signal other processors, which will call mp_rendezvous_action()
944 * with interrupts disabled
946 mp_rv_ncpus
= mp_cpus_call(CPUMASK_OTHERS
, NOSYNC
, &mp_rendezvous_action
, NULL
) + 1;
948 /* call executor function on this cpu */
949 mp_rendezvous_action(NULL
);
952 * Spin for everyone to complete.
953 * This is necessary to ensure that all processors have proceeded
954 * from the exit barrier before we release the rendezvous structure.
956 tsc_spin_start
= rdtsc64();
957 while (mp_rv_complete
< mp_rv_ncpus
) {
958 if (mp_spin_timeout(tsc_spin_start
)) {
959 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
964 mp_rv_setup_func
= NULL
;
965 mp_rv_action_func
= NULL
;
966 mp_rv_teardown_func
= NULL
;
967 mp_rv_func_arg
= NULL
;
970 mp_rendezvous_unlock();
974 mp_rendezvous_lock(void)
976 (void) mp_safe_spin_lock(&mp_rv_lock
);
980 mp_rendezvous_unlock(void)
982 simple_unlock(&mp_rv_lock
);
986 mp_rendezvous_break_lock(void)
988 simple_lock_init(&mp_rv_lock
, 0);
992 setup_disable_intrs(__unused
void * param_not_used
)
994 /* disable interrupts before the first barrier */
995 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
997 current_cpu_datap()->cpu_iflag
= intr
;
998 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
1002 teardown_restore_intrs(__unused
void * param_not_used
)
1004 /* restore interrupt flag following MTRR changes */
1005 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
1006 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
1010 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1011 * This is exported for use by kexts.
1014 mp_rendezvous_no_intrs(
1015 void (*action_func
)(void *),
1018 mp_rendezvous(setup_disable_intrs
,
1020 teardown_restore_intrs
,
1026 queue_chain_t link
; /* queue linkage */
1027 void (*func
)(void *, void *); /* routine to call */
1028 void *arg0
; /* routine's 1st arg */
1029 void *arg1
; /* routine's 2nd arg */
1030 cpumask_t
*maskp
; /* completion response mask */
1036 decl_simple_lock_data(, lock
);
1038 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1039 static mp_call_queue_t mp_cpus_call_freelist
;
1040 static mp_call_queue_t mp_cpus_call_head
[MAX_CPUS
];
1042 static inline boolean_t
1043 mp_call_head_lock(mp_call_queue_t
*cqp
)
1045 boolean_t intrs_enabled
;
1047 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1048 simple_lock(&cqp
->lock
, LCK_GRP_NULL
);
1050 return intrs_enabled
;
1054 * Deliver an NMIPI to a set of processors to cause them to panic .
1057 NMIPI_panic(cpumask_t cpu_mask
, NMI_reason_t why
)
1064 NMI_panic_reason
= why
;
1066 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
1067 if ((cpu_mask
& cpu_bit
) == 0) {
1070 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
1071 cpu_NMI_interrupt(cpu
);
1074 /* Wait (only so long) for NMi'ed cpus to respond */
1075 deadline
= mach_absolute_time() + LockTimeOut
;
1076 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
1077 if ((cpu_mask
& cpu_bit
) == 0) {
1080 while (!cpu_datap(cpu
)->cpu_NMI_acknowledged
&&
1081 mach_absolute_time() < deadline
) {
1088 static inline boolean_t
1089 mp_call_head_is_locked(mp_call_queue_t
*cqp
)
1091 return !ml_get_interrupts_enabled() &&
1092 hw_lock_held((hw_lock_t
)&cqp
->lock
);
1097 mp_call_head_unlock(mp_call_queue_t
*cqp
, boolean_t intrs_enabled
)
1099 simple_unlock(&cqp
->lock
);
1100 ml_set_interrupts_enabled(intrs_enabled
);
1103 static inline mp_call_t
*
1106 mp_call_t
*callp
= NULL
;
1107 boolean_t intrs_enabled
;
1108 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1110 intrs_enabled
= mp_call_head_lock(cqp
);
1111 if (!queue_empty(&cqp
->queue
)) {
1112 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1114 mp_call_head_unlock(cqp
, intrs_enabled
);
1120 mp_call_free(mp_call_t
*callp
)
1122 boolean_t intrs_enabled
;
1123 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1125 intrs_enabled
= mp_call_head_lock(cqp
);
1126 queue_enter_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1127 mp_call_head_unlock(cqp
, intrs_enabled
);
1130 static inline mp_call_t
*
1131 mp_call_dequeue_locked(mp_call_queue_t
*cqp
)
1133 mp_call_t
*callp
= NULL
;
1135 assert(mp_call_head_is_locked(cqp
));
1136 if (!queue_empty(&cqp
->queue
)) {
1137 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1143 mp_call_enqueue_locked(
1144 mp_call_queue_t
*cqp
,
1147 queue_enter(&cqp
->queue
, callp
, typeof(callp
), link
);
1150 /* Called on the boot processor to initialize global structures */
1152 mp_cpus_call_init(void)
1154 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1156 DBG("mp_cpus_call_init()\n");
1157 simple_lock_init(&cqp
->lock
, 0);
1158 queue_init(&cqp
->queue
);
1162 * Called at processor registration to add call buffers to the free list
1163 * and to initialize the per-cpu call queue.
1166 mp_cpus_call_cpu_init(int cpu
)
1169 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1172 simple_lock_init(&cqp
->lock
, 0);
1173 queue_init(&cqp
->queue
);
1174 for (i
= 0; i
< MP_CPUS_CALL_BUFS_PER_CPU
; i
++) {
1175 callp
= zalloc_permanent_type(mp_call_t
);
1176 mp_call_free(callp
);
1179 DBG("mp_cpus_call_init(%d) done\n", cpu
);
1183 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1184 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1187 mp_cpus_call_action(void)
1189 mp_call_queue_t
*cqp
;
1190 boolean_t intrs_enabled
;
1194 assert(!ml_get_interrupts_enabled());
1195 cqp
= &mp_cpus_call_head
[cpu_number()];
1196 intrs_enabled
= mp_call_head_lock(cqp
);
1197 while ((callp
= mp_call_dequeue_locked(cqp
)) != NULL
) {
1198 /* Copy call request to the stack to free buffer */
1200 mp_call_free(callp
);
1201 if (call
.func
!= NULL
) {
1202 mp_call_head_unlock(cqp
, intrs_enabled
);
1203 KERNEL_DEBUG_CONSTANT(
1204 TRACE_MP_CPUS_CALL_ACTION
,
1205 VM_KERNEL_UNSLIDE(call
.func
), VM_KERNEL_UNSLIDE_OR_PERM(call
.arg0
),
1206 VM_KERNEL_UNSLIDE_OR_PERM(call
.arg1
), VM_KERNEL_ADDRPERM(call
.maskp
), 0);
1207 call
.func(call
.arg0
, call
.arg1
);
1208 (void) mp_call_head_lock(cqp
);
1210 if (call
.maskp
!= NULL
) {
1211 i_bit_set(cpu_number(), call
.maskp
);
1214 mp_call_head_unlock(cqp
, intrs_enabled
);
1218 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1219 * Possible modes are:
1220 * SYNC: function is called serially on target cpus in logical cpu order
1221 * waiting for each call to be acknowledged before proceeding
1222 * ASYNC: function call is queued to the specified cpus
1223 * waiting for all calls to complete in parallel before returning
1224 * NOSYNC: function calls are queued
1225 * but we return before confirmation of calls completing.
1226 * The action function may be NULL.
1227 * The cpu mask may include the local cpu. Offline cpus are ignored.
1228 * The return value is the number of cpus on which the call was made or queued.
1234 void (*action_func
)(void *),
1237 return mp_cpus_call1(
1240 (void (*)(void *, void *))action_func
,
1247 mp_cpus_call_wait(boolean_t intrs_enabled
,
1248 cpumask_t cpus_called
,
1249 cpumask_t
*cpus_responded
)
1251 mp_call_queue_t
*cqp
;
1252 uint64_t tsc_spin_start
;
1254 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1255 cqp
= &mp_cpus_call_head
[cpu_number()];
1257 tsc_spin_start
= rdtsc64();
1258 while (*cpus_responded
!= cpus_called
) {
1259 if (!intrs_enabled
) {
1260 /* Sniffing w/o locking */
1261 if (!queue_empty(&cqp
->queue
)) {
1262 mp_cpus_call_action();
1264 cpu_signal_handler(NULL
);
1266 if (mp_spin_timeout(tsc_spin_start
)) {
1267 cpumask_t cpus_unresponsive
;
1269 cpus_unresponsive
= cpus_called
& ~(*cpus_responded
);
1270 NMIPI_panic(cpus_unresponsive
, CROSSCALL_TIMEOUT
);
1271 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1281 void (*action_func
)(void *, void *),
1284 cpumask_t
*cpus_calledp
)
1287 boolean_t intrs_enabled
= FALSE
;
1288 boolean_t call_self
= FALSE
;
1289 cpumask_t cpus_called
= 0;
1290 cpumask_t cpus_responded
= 0;
1291 long cpus_call_count
= 0;
1292 uint64_t tsc_spin_start
;
1293 boolean_t topo_lock
;
1295 KERNEL_DEBUG_CONSTANT(
1296 TRACE_MP_CPUS_CALL
| DBG_FUNC_START
,
1297 cpus
, mode
, VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
));
1299 if (!smp_initialized
) {
1300 if ((cpus
& CPUMASK_SELF
) == 0) {
1303 if (action_func
!= NULL
) {
1304 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1305 action_func(arg0
, arg1
);
1306 ml_set_interrupts_enabled(intrs_enabled
);
1313 * Queue the call for each non-local requested cpu.
1314 * This is performed under the topo lock to prevent changes to
1315 * cpus online state and to prevent concurrent rendezvouses --
1316 * although an exception is made if we're calling only the master
1317 * processor since that always remains active. Note: this exception
1318 * is expected for longterm timer nosync cross-calls to the master cpu.
1320 mp_disable_preemption();
1321 intrs_enabled
= ml_get_interrupts_enabled();
1322 topo_lock
= (cpus
!= cpu_to_cpumask(master_cpu
));
1324 ml_set_interrupts_enabled(FALSE
);
1325 (void) mp_safe_spin_lock(&x86_topo_lock
);
1327 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1328 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
1329 !cpu_is_running(cpu
)) {
1332 tsc_spin_start
= rdtsc64();
1333 if (cpu
== (cpu_t
) cpu_number()) {
1335 * We don't IPI ourself and if calling asynchronously,
1336 * we defer our call until we have signalled all others.
1339 if (mode
== SYNC
&& action_func
!= NULL
) {
1340 KERNEL_DEBUG_CONSTANT(
1341 TRACE_MP_CPUS_CALL_LOCAL
,
1342 VM_KERNEL_UNSLIDE(action_func
),
1343 VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1344 action_func(arg0
, arg1
);
1348 * Here to queue a call to cpu and IPI.
1350 mp_call_t
*callp
= NULL
;
1351 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1352 boolean_t intrs_inner
;
1355 if (callp
== NULL
) {
1356 callp
= mp_call_alloc();
1358 intrs_inner
= mp_call_head_lock(cqp
);
1359 if (callp
== NULL
) {
1360 mp_call_head_unlock(cqp
, intrs_inner
);
1361 KERNEL_DEBUG_CONSTANT(
1362 TRACE_MP_CPUS_CALL_NOBUF
,
1365 /* Sniffing w/o locking */
1366 if (!queue_empty(&cqp
->queue
)) {
1367 mp_cpus_call_action();
1369 handle_pending_TLB_flushes();
1371 if (mp_spin_timeout(tsc_spin_start
)) {
1372 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1373 tsc_spin_start
, rdtsc64());
1377 callp
->maskp
= (mode
== NOSYNC
) ? NULL
: &cpus_responded
;
1378 callp
->func
= action_func
;
1381 mp_call_enqueue_locked(cqp
, callp
);
1383 cpus_called
|= cpu_to_cpumask(cpu
);
1384 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
1385 mp_call_head_unlock(cqp
, intrs_inner
);
1387 mp_cpus_call_wait(intrs_inner
, cpus_called
, &cpus_responded
);
1392 simple_unlock(&x86_topo_lock
);
1393 ml_set_interrupts_enabled(intrs_enabled
);
1396 /* Call locally if mode not SYNC */
1397 if (mode
!= SYNC
&& call_self
) {
1398 KERNEL_DEBUG_CONSTANT(
1399 TRACE_MP_CPUS_CALL_LOCAL
,
1400 VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1401 if (action_func
!= NULL
) {
1402 ml_set_interrupts_enabled(FALSE
);
1403 action_func(arg0
, arg1
);
1404 ml_set_interrupts_enabled(intrs_enabled
);
1408 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1409 if (mode
== ASYNC
) {
1410 mp_cpus_call_wait(intrs_enabled
, cpus_called
, &cpus_responded
);
1413 /* Safe to allow pre-emption now */
1414 mp_enable_preemption();
1418 cpus_called
|= cpu_to_cpumask(cpu
);
1423 *cpus_calledp
= cpus_called
;
1426 KERNEL_DEBUG_CONSTANT(
1427 TRACE_MP_CPUS_CALL
| DBG_FUNC_END
,
1428 cpus_call_count
, cpus_called
, 0, 0, 0);
1430 return (cpu_t
) cpus_call_count
;
1435 mp_broadcast_action(__unused
void *null
)
1437 /* call action function */
1438 if (mp_bc_action_func
!= NULL
) {
1439 mp_bc_action_func(mp_bc_func_arg
);
1442 /* if we're the last one through, wake up the instigator */
1443 if (atomic_decl_and_test(&mp_bc_count
, 1)) {
1444 thread_wakeup(((event_t
)(uintptr_t) &mp_bc_count
));
1449 * mp_broadcast() runs a given function on all active cpus.
1450 * The caller blocks until the functions has run on all cpus.
1451 * The caller will also block if there is another pending broadcast.
1455 void (*action_func
)(void *),
1458 if (!smp_initialized
) {
1459 if (action_func
!= NULL
) {
1465 /* obtain broadcast lock */
1466 lck_mtx_lock(&mp_bc_lock
);
1468 /* set static function pointers */
1469 mp_bc_action_func
= action_func
;
1470 mp_bc_func_arg
= arg
;
1472 assert_wait((event_t
)(uintptr_t)&mp_bc_count
, THREAD_UNINT
);
1475 * signal other processors, which will call mp_broadcast_action()
1477 mp_bc_count
= real_ncpus
; /* assume max possible active */
1478 mp_bc_ncpus
= mp_cpus_call(CPUMASK_ALL
, NOSYNC
, *mp_broadcast_action
, NULL
);
1479 atomic_decl(&mp_bc_count
, real_ncpus
- mp_bc_ncpus
); /* subtract inactive */
1481 /* block for other cpus to have run action_func */
1482 if (mp_bc_ncpus
> 1) {
1483 thread_block(THREAD_CONTINUE_NULL
);
1485 clear_wait(current_thread(), THREAD_AWAKENED
);
1489 lck_mtx_unlock(&mp_bc_lock
);
1493 mp_cpus_kick(cpumask_t cpus
)
1496 boolean_t intrs_enabled
= FALSE
;
1498 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1499 mp_safe_spin_lock(&x86_topo_lock
);
1501 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1502 if (((cpu_to_cpumask(cpu
) & cpus
) == 0)
1503 || !cpu_is_running(cpu
)) {
1507 lapic_send_ipi(cpu
, LAPIC_VECTOR(KICK
));
1510 simple_unlock(&x86_topo_lock
);
1511 ml_set_interrupts_enabled(intrs_enabled
);
1515 i386_activate_cpu(void)
1517 cpu_data_t
*cdp
= current_cpu_datap();
1519 assert(!ml_get_interrupts_enabled());
1521 if (!smp_initialized
) {
1522 cdp
->cpu_running
= TRUE
;
1526 mp_safe_spin_lock(&x86_topo_lock
);
1527 cdp
->cpu_running
= TRUE
;
1529 pmap_tlbi_range(0, ~0ULL, true, 0);
1530 simple_unlock(&x86_topo_lock
);
1534 i386_deactivate_cpu(void)
1536 cpu_data_t
*cdp
= current_cpu_datap();
1538 assert(!ml_get_interrupts_enabled());
1540 KERNEL_DEBUG_CONSTANT(
1541 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_START
,
1544 mp_safe_spin_lock(&x86_topo_lock
);
1545 cdp
->cpu_running
= FALSE
;
1546 simple_unlock(&x86_topo_lock
);
1549 * Move all of this cpu's timers to the master/boot cpu,
1550 * and poke it in case there's a sooner deadline for it to schedule.
1552 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1553 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, timer_queue_expire_local
, NULL
);
1557 #endif /* MONOTONIC */
1560 * Open an interrupt window
1561 * and ensure any pending IPI or timer is serviced
1563 mp_disable_preemption();
1564 ml_set_interrupts_enabled(TRUE
);
1566 while (cdp
->cpu_signals
&& x86_lcpu()->rtcDeadline
!= EndOfAllTime
) {
1570 * Ensure there's no remaining timer deadline set
1571 * - AICPM may have left one active.
1575 ml_set_interrupts_enabled(FALSE
);
1576 mp_enable_preemption();
1578 KERNEL_DEBUG_CONSTANT(
1579 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_END
,
1583 int pmsafe_debug
= 1;
1586 volatile boolean_t mp_kdp_trap
= FALSE
;
1587 volatile boolean_t mp_kdp_is_NMI
= FALSE
;
1588 volatile unsigned long mp_kdp_ncpus
;
1589 boolean_t mp_kdp_state
;
1593 mp_kdp_enter(boolean_t proceed_on_failure
)
1596 unsigned int ncpus
= 0;
1597 unsigned int my_cpu
;
1598 uint64_t tsc_timeout
;
1600 DBG("mp_kdp_enter()\n");
1603 * Here to enter the debugger.
1604 * In case of races, only one cpu is allowed to enter kdp after
1607 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1608 my_cpu
= cpu_number();
1610 if (my_cpu
== (unsigned) debugger_cpu
) {
1611 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1616 uint64_t start_time
= cpu_datap(my_cpu
)->debugger_entry_time
= mach_absolute_time();
1618 while (!locked
|| mp_kdp_trap
) {
1620 simple_unlock(&x86_topo_lock
);
1622 if (proceed_on_failure
) {
1623 if (mach_absolute_time() - start_time
> 500000000ll) {
1624 paniclog_append_noflush("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1627 locked
= simple_lock_try(&x86_topo_lock
, LCK_GRP_NULL
);
1632 mp_safe_spin_lock(&x86_topo_lock
);
1636 if (locked
&& mp_kdp_trap
) {
1637 simple_unlock(&x86_topo_lock
);
1638 DBG("mp_kdp_enter() race lost\n");
1640 mp_kdp_wait(TRUE
, FALSE
);
1646 if (pmsafe_debug
&& !kdp_snapshot
) {
1647 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1650 debugger_cpu
= my_cpu
;
1652 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1654 debugger_entry_time
= cpu_datap(my_cpu
)->debugger_entry_time
;
1657 * Deliver a nudge to other cpus, counting how many
1659 DBG("mp_kdp_enter() signaling other processors\n");
1660 if (force_immediate_debugger_NMI
== FALSE
) {
1661 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1662 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1666 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1669 * Wait other processors to synchronize
1671 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1674 * This timeout is rather arbitrary; we don't want to NMI
1675 * processors that are executing at potentially
1676 * "unsafe-to-interrupt" points such as the trampolines,
1677 * but neither do we want to lose state by waiting too long.
1679 tsc_timeout
= rdtsc64() + (LockTimeOutTSC
);
1681 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1683 * A TLB shootdown request may be pending--this would
1684 * result in the requesting processor waiting in
1685 * PMAP_UPDATE_TLBS() until this processor deals with it.
1686 * Process it, so it can now enter mp_kdp_wait()
1688 handle_pending_TLB_flushes();
1691 /* If we've timed out, and some processor(s) are still unresponsive,
1692 * interrupt them with an NMI via the local APIC, iff a panic is
1695 if (panic_active()) {
1698 if (mp_kdp_ncpus
!= ncpus
) {
1699 unsigned int wait_cycles
= 0;
1700 if (proceed_on_failure
) {
1701 paniclog_append_noflush("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu
);
1703 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu
);
1705 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1706 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1709 if (cpu_signal_pending(cpu
, MP_KDP
)) {
1710 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
1711 cpu_NMI_interrupt(cpu
);
1714 /* Wait again for the same timeout */
1715 tsc_timeout
= rdtsc64() + (LockTimeOutTSC
);
1716 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1717 handle_pending_TLB_flushes();
1721 if (mp_kdp_ncpus
!= ncpus
) {
1722 paniclog_append_noflush("mp_kdp_enter() NMI pending on cpus:");
1723 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1724 if (cpu_is_running(cpu
) && !cpu_datap(cpu
)->cpu_NMI_acknowledged
) {
1725 paniclog_append_noflush(" %d", cpu
);
1728 paniclog_append_noflush("\n");
1729 if (proceed_on_failure
) {
1730 paniclog_append_noflush("mp_kdp_enter() timed-out during %s wait after NMI;"
1731 "expected %u acks but received %lu after %u loops in %llu ticks\n",
1732 (locked
? "locked" : "unlocked"), ncpus
, mp_kdp_ncpus
, wait_cycles
, LockTimeOutTSC
);
1734 panic("mp_kdp_enter() timed-out during %s wait after NMI;"
1735 "expected %u acks but received %lu after %u loops in %llu ticks",
1736 (locked
? "locked" : "unlocked"), ncpus
, mp_kdp_ncpus
, wait_cycles
, LockTimeOutTSC
);
1741 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1742 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1745 cpu_NMI_interrupt(cpu
);
1750 simple_unlock(&x86_topo_lock
);
1753 DBG("mp_kdp_enter() %d processors done %s\n",
1754 (int)mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1756 postcode(MP_KDP_ENTER
);
1760 mp_kdp_all_cpus_halted()
1762 unsigned int ncpus
= 0, cpu
= 0, my_cpu
= 0;
1764 my_cpu
= cpu_number();
1765 ncpus
= 1; /* current CPU */
1766 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1767 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1773 return mp_kdp_ncpus
== ncpus
;
1777 cpu_signal_pending(int cpu
, mp_event_t event
)
1779 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1780 boolean_t retval
= FALSE
;
1782 if (i_bit(event
, signals
)) {
1789 kdp_x86_xcpu_invoke(const uint16_t lcpu
, kdp_x86_xcpu_func_t func
,
1790 void *arg0
, void *arg1
)
1792 if (lcpu
> (real_ncpus
- 1)) {
1800 kdp_xcpu_call_func
.func
= func
;
1801 kdp_xcpu_call_func
.ret
= -1;
1802 kdp_xcpu_call_func
.arg0
= arg0
;
1803 kdp_xcpu_call_func
.arg1
= arg1
;
1804 kdp_xcpu_call_func
.cpu
= lcpu
;
1805 DBG("Invoking function %p on CPU %d\n", func
, (int32_t)lcpu
);
1806 while (kdp_xcpu_call_func
.cpu
!= KDP_XCPU_NONE
) {
1809 return kdp_xcpu_call_func
.ret
;
1813 kdp_x86_xcpu_poll(void)
1815 if ((uint16_t)cpu_number() == kdp_xcpu_call_func
.cpu
) {
1816 kdp_xcpu_call_func
.ret
=
1817 kdp_xcpu_call_func
.func(kdp_xcpu_call_func
.arg0
,
1818 kdp_xcpu_call_func
.arg1
,
1820 kdp_xcpu_call_func
.cpu
= KDP_XCPU_NONE
;
1825 mp_kdp_wait(boolean_t flush
, boolean_t isNMI
)
1827 DBG("mp_kdp_wait()\n");
1829 current_cpu_datap()->debugger_ipi_time
= mach_absolute_time();
1831 /* If we've trapped due to a machine-check, save MCA registers */
1835 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1836 while (mp_kdp_trap
|| (isNMI
== TRUE
)) {
1838 * A TLB shootdown request may be pending--this would result
1839 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1840 * until this processor handles it.
1841 * Process it, so it can now enter mp_kdp_wait()
1844 handle_pending_TLB_flushes();
1847 kdp_x86_xcpu_poll();
1851 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1852 DBG("mp_kdp_wait() done\n");
1858 DBG("mp_kdp_exit()\n");
1860 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1862 debugger_exit_time
= mach_absolute_time();
1864 mp_kdp_trap
= FALSE
;
1867 /* Wait other processors to stop spinning. XXX needs timeout */
1868 DBG("mp_kdp_exit() waiting for processors to resume\n");
1869 while (mp_kdp_ncpus
> 0) {
1871 * a TLB shootdown request may be pending... this would result in the requesting
1872 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1873 * Process it, so it can now enter mp_kdp_wait()
1875 handle_pending_TLB_flushes();
1880 if (pmsafe_debug
&& !kdp_snapshot
) {
1881 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1884 debugger_exit_time
= mach_absolute_time();
1886 DBG("mp_kdp_exit() done\n");
1887 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1888 postcode(MP_KDP_EXIT
);
1891 #endif /* MACH_KDP */
1894 mp_recent_debugger_activity(void)
1896 uint64_t abstime
= mach_absolute_time();
1897 return ((abstime
- debugger_entry_time
) < LastDebuggerEntryAllowance
) ||
1898 ((abstime
- debugger_exit_time
) < LastDebuggerEntryAllowance
);
1904 __unused processor_t processor
)
1910 processor_t processor
)
1912 int cpu
= processor
->cpu_id
;
1914 if (cpu
!= cpu_number()) {
1915 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1916 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, 1, 0, 0, 0);
1921 slave_machine_init(void *param
)
1924 * Here in process context, but with interrupts disabled.
1926 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1928 if (param
== FULL_SLAVE_INIT
) {
1934 cpu_machine_init(); /* Interrupts enabled hereafter */
1941 return get_cpu_number();
1945 current_percpu_base(void)
1947 return get_current_percpu_base();
1955 simple_lock_init(&cpu_warm_lock
, 0);
1956 queue_init(&cpu_warm_call_list
);
1957 for (i
= 0; i
< NUM_CPU_WARM_CALLS
; i
++) {
1958 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)&cpu_warm_call_arr
[i
]);
1963 grab_warm_timer_call()
1966 timer_call_t call
= NULL
;
1969 simple_lock(&cpu_warm_lock
, LCK_GRP_NULL
);
1970 if (!queue_empty(&cpu_warm_call_list
)) {
1971 call
= (timer_call_t
) dequeue_head(&cpu_warm_call_list
);
1973 simple_unlock(&cpu_warm_lock
);
1980 free_warm_timer_call(timer_call_t call
)
1985 simple_lock(&cpu_warm_lock
, LCK_GRP_NULL
);
1986 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)call
);
1987 simple_unlock(&cpu_warm_lock
);
1992 * Runs in timer call context (interrupts disabled).
1995 cpu_warm_timer_call_func(
1996 timer_call_param_t p0
,
1997 __unused timer_call_param_t p1
)
1999 free_warm_timer_call((timer_call_t
)p0
);
2004 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
2010 cpu_warm_data_t cwdp
= (cpu_warm_data_t
)arg
;
2012 timer_call_enter(cwdp
->cwd_call
, cwdp
->cwd_deadline
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
2013 cwdp
->cwd_result
= 0;
2019 * Not safe to call with interrupts disabled.
2022 ml_interrupt_prewarm(
2025 struct cpu_warm_data cwd
;
2029 if (ml_get_interrupts_enabled() == FALSE
) {
2030 panic("%s: Interrupts disabled?\n", __FUNCTION__
);
2034 * If the platform doesn't need our help, say that we succeeded.
2036 if (!ml_get_interrupt_prewake_applicable()) {
2037 return KERN_SUCCESS
;
2041 * Grab a timer call to use.
2043 call
= grab_warm_timer_call();
2045 return KERN_RESOURCE_SHORTAGE
;
2048 timer_call_setup(call
, cpu_warm_timer_call_func
, call
);
2049 cwd
.cwd_call
= call
;
2050 cwd
.cwd_deadline
= deadline
;
2054 * For now, non-local interrupts happen on the master processor.
2056 ct
= mp_cpus_call(cpu_to_cpumask(master_cpu
), SYNC
, _cpu_warm_setup
, &cwd
);
2058 free_warm_timer_call(call
);
2059 return KERN_FAILURE
;
2061 return cwd
.cwd_result
;
2065 #if DEBUG || DEVELOPMENT
2067 kernel_spin(uint64_t spin_ns
)
2074 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns
);
2075 istate
= ml_set_interrupts_enabled(FALSE
);
2076 cdp
= current_cpu_datap();
2077 nanoseconds_to_absolutetime(spin_ns
, &spin_abs
);
2079 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
2080 cdp
->cpu_int_event_time
= mach_absolute_time();
2081 cdp
->cpu_int_state
= (void *) USER_STATE(current_thread());
2083 deadline
= mach_absolute_time() + spin_ns
;
2084 while (mach_absolute_time() < deadline
) {
2088 cdp
->cpu_int_event_time
= 0;
2089 cdp
->cpu_int_state
= NULL
;
2091 ml_set_interrupts_enabled(istate
);
2092 kprintf("kernel_spin() continuing\n");
2096 * Called from the scheduler's maintenance thread,
2097 * scan running processors for long-running ISRs and:
2098 * - panic if longer than LockTimeOut, or
2099 * - log if more than a quantum.
2102 mp_interrupt_watchdog(void)
2105 boolean_t intrs_enabled
= FALSE
;
2106 uint16_t cpu_int_num
;
2107 uint64_t cpu_int_event_time
;
2109 uint64_t cpu_int_duration
;
2111 x86_saved_state_t
*cpu_int_state
;
2113 if (__improbable(!mp_interrupt_watchdog_enabled
)) {
2117 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
2118 now
= mach_absolute_time();
2120 * While timeouts are not suspended,
2121 * check all other processors for long outstanding interrupt handling.
2124 cpu
< (cpu_t
) real_ncpus
&& !machine_timeout_suspended();
2126 if ((cpu
== (cpu_t
) cpu_number()) ||
2127 (!cpu_is_running(cpu
))) {
2130 cpu_int_event_time
= cpu_datap(cpu
)->cpu_int_event_time
;
2131 if (cpu_int_event_time
== 0) {
2134 if (__improbable(now
< cpu_int_event_time
)) {
2135 continue; /* skip due to inter-processor skew */
2137 cpu_int_state
= cpu_datap(cpu
)->cpu_int_state
;
2138 if (__improbable(cpu_int_state
== NULL
)) {
2139 /* The interrupt may have been dismissed */
2143 /* Here with a cpu handling an interrupt */
2145 cpu_int_duration
= now
- cpu_int_event_time
;
2146 if (__improbable(cpu_int_duration
> LockTimeOut
)) {
2147 cpu_int_num
= saved_state64(cpu_int_state
)->isf
.trapno
;
2148 cpu_rip
= saved_state64(cpu_int_state
)->isf
.rip
;
2149 vector_timed_out
= cpu_int_num
;
2150 NMIPI_panic(cpu_to_cpumask(cpu
), INTERRUPT_WATCHDOG
);
2151 panic("Interrupt watchdog, "
2152 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2153 cpu
, cpu_int_num
, cpu_int_event_time
, now
, cpu_int_state
, cpu_rip
);
2155 } else if (__improbable(cpu_int_duration
> (uint64_t) std_quantum
)) {
2156 mp_interrupt_watchdog_events
++;
2157 cpu_int_num
= saved_state64(cpu_int_state
)->isf
.trapno
;
2158 cpu_rip
= saved_state64(cpu_int_state
)->isf
.rip
;
2159 ml_set_interrupts_enabled(intrs_enabled
);
2160 printf("Interrupt watchdog, "
2161 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2162 cpu
, cpu_int_num
, cpu_int_event_time
, now
, cpu_rip
);
2167 ml_set_interrupts_enabled(intrs_enabled
);