2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
33 #include <kdp/kdp_internal.h>
34 #include <mach_ldebug.h>
36 #include <mach/mach_types.h>
37 #include <mach/kern_return.h>
39 #include <kern/kern_types.h>
40 #include <kern/startup.h>
41 #include <kern/timer_queue.h>
42 #include <kern/processor.h>
43 #include <kern/cpu_number.h>
44 #include <kern/cpu_data.h>
45 #include <kern/assert.h>
46 #include <kern/lock_group.h>
47 #include <kern/machine.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53 #include <prng/random.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_kern.h>
58 #include <i386/bit_routines.h>
59 #include <i386/proc_reg.h>
60 #include <i386/cpu_threads.h>
61 #include <i386/mp_desc.h>
62 #include <i386/misc_protos.h>
63 #include <i386/trap.h>
64 #include <i386/postcode.h>
65 #include <i386/machine_routines.h>
67 #include <i386/mp_events.h>
68 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/pmCPU.h>
74 #include <i386/machine_check.h>
76 #include <i386/acpi.h>
78 #include <sys/kdebug.h>
80 #include <console/serial_protos.h>
83 #include <kern/monotonic.h>
84 #endif /* MONOTONIC */
87 #define PAUSE delay(1000000)
88 #define DBG(x...) kprintf(x)
94 /* Debugging/test trace events: */
95 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
96 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
97 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
98 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
99 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
100 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
101 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
102 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
104 #define ABS(v) (((v) > 0)?(v):-(v))
106 void slave_boot_init(void);
107 void i386_cpu_IPI(int cpu
);
110 static void mp_kdp_wait(boolean_t flush
, boolean_t isNMI
);
111 #endif /* MACH_KDP */
114 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
115 #endif /* MACH_KDP */
116 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
118 boolean_t smp_initialized
= FALSE
;
119 uint32_t TSC_sync_margin
= 0xFFF;
120 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
121 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
122 #if DEBUG || DEVELOPMENT
123 boolean_t mp_interrupt_watchdog_enabled
= TRUE
;
124 uint32_t mp_interrupt_watchdog_events
= 0;
127 decl_simple_lock_data(, debugger_callback_lock
);
128 struct debugger_callback
*debugger_callback
= NULL
;
130 decl_lck_mtx_data(static, mp_cpu_boot_lock
);
131 lck_mtx_ext_t mp_cpu_boot_lock_ext
;
133 /* Variables needed for MP rendezvous. */
134 decl_simple_lock_data(, mp_rv_lock
);
135 static void (*mp_rv_setup_func
)(void *arg
);
136 static void (*mp_rv_action_func
)(void *arg
);
137 static void (*mp_rv_teardown_func
)(void *arg
);
138 static void *mp_rv_func_arg
;
139 static volatile int mp_rv_ncpus
;
140 /* Cache-aligned barriers: */
141 static volatile long mp_rv_entry
__attribute__((aligned(64)));
142 static volatile long mp_rv_exit
__attribute__((aligned(64)));
143 static volatile long mp_rv_complete
__attribute__((aligned(64)));
145 volatile uint64_t debugger_entry_time
;
146 volatile uint64_t debugger_exit_time
;
149 extern int kdp_snapshot
;
150 static struct _kdp_xcpu_call_func
{
151 kdp_x86_xcpu_func_t func
;
154 volatile uint16_t cpu
;
155 } kdp_xcpu_call_func
= {
161 /* Variables needed for MP broadcast. */
162 static void (*mp_bc_action_func
)(void *arg
);
163 static void *mp_bc_func_arg
;
164 static int mp_bc_ncpus
;
165 static volatile long mp_bc_count
;
166 decl_lck_mtx_data(static, mp_bc_lock
);
167 lck_mtx_ext_t mp_bc_lock_ext
;
168 static volatile int debugger_cpu
= -1;
169 volatile long NMIPI_acks
= 0;
170 volatile long NMI_count
= 0;
171 static NMI_reason_t NMI_panic_reason
= NONE
;
172 static int vector_timed_out
;
174 extern void NMI_cpus(void);
176 static void mp_cpus_call_init(void);
177 static void mp_cpus_call_action(void);
178 static void mp_call_PM(void);
180 char mp_slave_stack
[PAGE_SIZE
] __attribute__((aligned(PAGE_SIZE
))); // Temp stack for slave init
182 /* PAL-related routines */
183 boolean_t
i386_smp_init(int nmi_vector
, i386_intr_func_t nmi_handler
,
184 int ipi_vector
, i386_intr_func_t ipi_handler
);
185 void i386_start_cpu(int lapic_id
, int cpu_num
);
186 void i386_send_NMI(int cpu
);
187 void NMIPI_enable(boolean_t
);
189 static lck_grp_t smp_lck_grp
;
190 static lck_grp_attr_t smp_lck_grp_attr
;
192 #define NUM_CPU_WARM_CALLS 20
193 struct timer_call cpu_warm_call_arr
[NUM_CPU_WARM_CALLS
];
194 queue_head_t cpu_warm_call_list
;
195 decl_simple_lock_data(static, cpu_warm_lock
);
197 typedef struct cpu_warm_data
{
198 timer_call_t cwd_call
;
199 uint64_t cwd_deadline
;
203 static void cpu_prewarm_init(void);
204 static void cpu_warm_timer_call_func(call_entry_param_t p0
, call_entry_param_t p1
);
205 static void _cpu_warm_setup(void *arg
);
206 static timer_call_t
grab_warm_timer_call(void);
207 static void free_warm_timer_call(timer_call_t call
);
212 simple_lock_init(&mp_rv_lock
, 0);
213 simple_lock_init(&debugger_callback_lock
, 0);
214 lck_grp_attr_setdefault(&smp_lck_grp_attr
);
215 lck_grp_init(&smp_lck_grp
, "i386_smp", &smp_lck_grp_attr
);
216 lck_mtx_init_ext(&mp_cpu_boot_lock
, &mp_cpu_boot_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
217 lck_mtx_init_ext(&mp_bc_lock
, &mp_bc_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
220 if (!i386_smp_init(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
,
221 LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
)) {
227 DBGLOG_CPU_INIT(master_cpu
);
230 mp_cpus_call_cpu_init(master_cpu
);
232 #if DEBUG || DEVELOPMENT
233 if (PE_parse_boot_argn("interrupt_watchdog",
234 &mp_interrupt_watchdog_enabled
,
235 sizeof(mp_interrupt_watchdog_enabled
))) {
236 kprintf("Interrupt watchdog %sabled\n",
237 mp_interrupt_watchdog_enabled
? "en" : "dis");
241 if (PE_parse_boot_argn("TSC_sync_margin",
242 &TSC_sync_margin
, sizeof(TSC_sync_margin
))) {
243 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin
);
244 } else if (cpuid_vmm_present()) {
245 kprintf("TSC sync margin disabled\n");
248 smp_initialized
= TRUE
;
259 } processor_start_info_t
;
260 static processor_start_info_t start_info
__attribute__((aligned(64)));
263 * Cache-alignment is to avoid cross-cpu false-sharing interference.
265 static volatile long tsc_entry_barrier
__attribute__((aligned(64)));
266 static volatile long tsc_exit_barrier
__attribute__((aligned(64)));
267 static volatile uint64_t tsc_target
__attribute__((aligned(64)));
270 * Poll a CPU to see when it has marked itself as running.
273 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
275 while (iters
-- > 0) {
276 if (cpu_datap(slot_num
)->cpu_running
) {
284 * Quickly bring a CPU back online which has been halted.
287 intel_startCPU_fast(int slot_num
)
292 * Try to perform a fast restart
294 rc
= pmCPUExitHalt(slot_num
);
295 if (rc
!= KERN_SUCCESS
) {
297 * The CPU was not eligible for a fast restart.
302 KERNEL_DEBUG_CONSTANT(
303 TRACE_MP_CPU_FAST_START
| DBG_FUNC_START
,
304 slot_num
, 0, 0, 0, 0);
307 * Wait until the CPU is back online.
309 mp_disable_preemption();
312 * We use short pauses (1us) for low latency. 30,000 iterations is
313 * longer than a full restart would require so it should be more
317 mp_wait_for_cpu_up(slot_num
, 30000, 1);
318 mp_enable_preemption();
320 KERNEL_DEBUG_CONSTANT(
321 TRACE_MP_CPU_FAST_START
| DBG_FUNC_END
,
322 slot_num
, cpu_datap(slot_num
)->cpu_running
, 0, 0, 0);
325 * Check to make sure that the CPU is really running. If not,
326 * go through the slow path.
328 if (cpu_datap(slot_num
)->cpu_running
) {
338 /* Here on the started cpu with cpu_running set TRUE */
340 if (TSC_sync_margin
&&
341 start_info
.target_cpu
== cpu_number()) {
343 * I've just started-up, synchronize again with the starter cpu
344 * and then snap my TSC.
347 atomic_decl(&tsc_entry_barrier
, 1);
348 while (tsc_entry_barrier
!= 0) {
349 ; /* spin for starter and target at barrier */
351 tsc_target
= rdtsc64();
352 atomic_decl(&tsc_exit_barrier
, 1);
360 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
362 /* Ignore this if the current processor is not the starter */
363 if (cpu_number() != psip
->starter_cpu
) {
367 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
368 arg
, psip
->target_cpu
, psip
->target_lapic
);
370 KERNEL_DEBUG_CONSTANT(
371 TRACE_MP_CPU_START
| DBG_FUNC_START
,
373 psip
->target_lapic
, 0, 0, 0);
375 i386_start_cpu(psip
->target_lapic
, psip
->target_cpu
);
377 #ifdef POSTCODE_DELAY
378 /* Wait much longer if postcodes are displayed for a delay period. */
381 DBG("start_cpu(%p) about to wait for cpu %d\n",
382 arg
, psip
->target_cpu
);
384 mp_wait_for_cpu_up(psip
->target_cpu
, i
* 100, 100);
386 KERNEL_DEBUG_CONSTANT(
387 TRACE_MP_CPU_START
| DBG_FUNC_END
,
389 cpu_datap(psip
->target_cpu
)->cpu_running
, 0, 0, 0);
391 if (TSC_sync_margin
&&
392 cpu_datap(psip
->target_cpu
)->cpu_running
) {
394 * Compare the TSC from the started processor with ours.
395 * Report and log/panic if it diverges by more than
396 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
397 * can be overriden by boot-arg (with 0 meaning no checking).
399 uint64_t tsc_starter
;
401 atomic_decl(&tsc_entry_barrier
, 1);
402 while (tsc_entry_barrier
!= 0) {
403 ; /* spin for both processors at barrier */
405 tsc_starter
= rdtsc64();
406 atomic_decl(&tsc_exit_barrier
, 1);
407 while (tsc_exit_barrier
!= 0) {
408 ; /* spin for target to store its TSC */
410 tsc_delta
= tsc_target
- tsc_starter
;
411 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
412 psip
->target_cpu
, tsc_target
, tsc_delta
, tsc_delta
);
413 if (ABS(tsc_delta
) > (int64_t) TSC_sync_margin
) {
419 "Unsynchronized TSC for cpu %d: "
420 "0x%016llx, delta 0x%llx\n",
421 psip
->target_cpu
, tsc_target
, tsc_delta
);
430 int lapic
= cpu_to_lapic
[slot_num
];
435 DBGLOG_CPU_INIT(slot_num
);
437 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
438 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) (uintptr_t)IdlePTD
);
441 * Initialize (or re-initialize) the descriptor tables for this cpu.
442 * Propagate processor mode to slave.
444 cpu_desc_init(cpu_datap(slot_num
));
446 /* Serialize use of the slave boot stack, etc. */
447 lck_mtx_lock(&mp_cpu_boot_lock
);
449 istate
= ml_set_interrupts_enabled(FALSE
);
450 if (slot_num
== get_cpu_number()) {
451 ml_set_interrupts_enabled(istate
);
452 lck_mtx_unlock(&mp_cpu_boot_lock
);
456 start_info
.starter_cpu
= cpu_number();
457 start_info
.target_cpu
= slot_num
;
458 start_info
.target_lapic
= lapic
;
459 tsc_entry_barrier
= 2;
460 tsc_exit_barrier
= 2;
463 * Perform the processor startup sequence with all running
464 * processors rendezvous'ed. This is required during periods when
465 * the cache-disable bit is set for MTRR/PAT initialization.
467 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
469 start_info
.target_cpu
= 0;
471 ml_set_interrupts_enabled(istate
);
472 lck_mtx_unlock(&mp_cpu_boot_lock
);
474 if (!cpu_datap(slot_num
)->cpu_running
) {
475 kprintf("Failed to start CPU %02d\n", slot_num
);
476 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
481 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
487 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
488 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
490 MP_EVENT_NAME_DECL();
492 #endif /* MP_DEBUG */
495 * Note: called with NULL state when polling for TLB flush and cross-calls.
498 cpu_signal_handler(x86_saved_state_t
*regs
)
501 #pragma unused (regs)
502 #endif /* !MACH_KDP */
504 volatile int *my_word
;
506 SCHED_STATS_IPI(current_processor());
508 my_cpu
= cpu_number();
509 my_word
= &cpu_data_ptr
[my_cpu
]->cpu_signals
;
510 /* Store the initial set of signals for diagnostics. New
511 * signals could arrive while these are being processed
512 * so it's no more than a hint.
515 cpu_data_ptr
[my_cpu
]->cpu_prior_signals
= *my_word
;
519 if (i_bit(MP_KDP
, my_word
)) {
520 DBGLOG(cpu_handle
, my_cpu
, MP_KDP
);
521 i_bit_clear(MP_KDP
, my_word
);
522 /* Ensure that the i386_kernel_state at the base of the
523 * current thread's stack (if any) is synchronized with the
524 * context at the moment of the interrupt, to facilitate
525 * access through the debugger.
527 sync_iss_to_iks(regs
);
528 if (pmsafe_debug
&& !kdp_snapshot
) {
529 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
531 mp_kdp_wait(TRUE
, FALSE
);
532 if (pmsafe_debug
&& !kdp_snapshot
) {
533 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
536 #endif /* MACH_KDP */
537 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
538 DBGLOG(cpu_handle
, my_cpu
, MP_TLB_FLUSH
);
539 i_bit_clear(MP_TLB_FLUSH
, my_word
);
540 pmap_update_interrupt();
541 } else if (i_bit(MP_CALL
, my_word
)) {
542 DBGLOG(cpu_handle
, my_cpu
, MP_CALL
);
543 i_bit_clear(MP_CALL
, my_word
);
544 mp_cpus_call_action();
545 } else if (i_bit(MP_CALL_PM
, my_word
)) {
546 DBGLOG(cpu_handle
, my_cpu
, MP_CALL_PM
);
547 i_bit_clear(MP_CALL_PM
, my_word
);
551 /* Called to poll only for cross-calls and TLB flush */
553 } else if (i_bit(MP_AST
, my_word
)) {
554 DBGLOG(cpu_handle
, my_cpu
, MP_AST
);
555 i_bit_clear(MP_AST
, my_word
);
556 ast_check(cpu_to_processor(my_cpu
));
563 extern void kprintf_break_lock(void);
565 NMIInterruptHandler(x86_saved_state_t
*regs
)
569 uint64_t now
= mach_absolute_time();
571 if (panic_active() && !panicDebugging
) {
573 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
580 atomic_incl(&NMIPI_acks
, 1);
581 atomic_incl(&NMI_count
, 1);
582 sync_iss_to_iks_unconditionally(regs
);
583 __asm__
volatile ("movq %%rbp, %0" : "=m" (stackptr
));
585 if (cpu_number() == debugger_cpu
) {
589 if (NMI_panic_reason
== SPINLOCK_TIMEOUT
) {
590 snprintf(&pstr
[0], sizeof(pstr
),
591 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
592 cpu_number(), now
, spinlock_timed_out
, (void *) spinlock_timed_out
->interlock
.lock_data
, current_thread(), spinlock_owner_cpu
);
593 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
594 } else if (NMI_panic_reason
== TLB_FLUSH_TIMEOUT
) {
595 snprintf(&pstr
[0], sizeof(pstr
),
596 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
597 cpu_number(), now
, current_cpu_datap()->cpu_tlb_invalid
);
598 panic_i386_backtrace(stackptr
, 48, &pstr
[0], TRUE
, regs
);
599 } else if (NMI_panic_reason
== CROSSCALL_TIMEOUT
) {
600 snprintf(&pstr
[0], sizeof(pstr
),
601 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
603 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
604 } else if (NMI_panic_reason
== INTERRUPT_WATCHDOG
) {
605 snprintf(&pstr
[0], sizeof(pstr
),
606 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
607 cpu_number(), now
, vector_timed_out
);
608 panic_i386_backtrace(stackptr
, 64, &pstr
[0], TRUE
, regs
);
612 if (pmsafe_debug
&& !kdp_snapshot
) {
613 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
615 current_cpu_datap()->cpu_NMI_acknowledged
= TRUE
;
616 i_bit_clear(MP_KDP
, ¤t_cpu_datap()->cpu_signals
);
617 if (panic_active() || NMI_panic_reason
!= NONE
) {
618 mp_kdp_wait(FALSE
, TRUE
);
619 } else if (!mp_kdp_trap
&&
621 virtualized
&& (debug_boot_arg
& DB_NMI
)) {
623 * Under a VMM with the debug boot-arg set, drop into kdp.
624 * Since an NMI is involved, there's a risk of contending with
625 * a panic. And side-effects of NMIs may result in entry into,
626 * and continuing from, the debugger being unreliable.
628 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI
, FALSE
, TRUE
)) {
629 kprintf_break_lock();
630 kprintf("Debugger entry requested by NMI\n");
631 kdp_i386_trap(T_DEBUG
, saved_state64(regs
), 0, 0);
632 printf("Debugger entry requested by NMI\n");
633 mp_kdp_is_NMI
= FALSE
;
635 mp_kdp_wait(FALSE
, FALSE
);
638 mp_kdp_wait(FALSE
, FALSE
);
640 if (pmsafe_debug
&& !kdp_snapshot
) {
641 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
650 * cpu_interrupt is really just to be used by the scheduler to
651 * get a CPU's attention it may not always issue an IPI. If an
652 * IPI is always needed then use i386_cpu_IPI.
655 cpu_interrupt(int cpu
)
657 boolean_t did_IPI
= FALSE
;
660 && pmCPUExitIdle(cpu_datap(cpu
))) {
665 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, did_IPI
, 0, 0, 0);
669 * Send a true NMI via the local APIC to the specified CPU.
672 cpu_NMI_interrupt(int cpu
)
674 if (smp_initialized
) {
683 boolean_t intrs_enabled
;
684 uint64_t tsc_timeout
;
686 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
688 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
689 if (!cpu_is_running(cpu
)) {
692 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
693 cpu_NMI_interrupt(cpu
);
694 tsc_timeout
= !machine_timeout_suspended() ?
695 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
697 while (!cpu_datap(cpu
)->cpu_NMI_acknowledged
) {
698 handle_pending_TLB_flushes();
700 if (rdtsc64() > tsc_timeout
) {
701 panic("NMI_cpus() timeout cpu %d", cpu
);
704 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
708 ml_set_interrupts_enabled(intrs_enabled
);
711 static void(*volatile mp_PM_func
)(void) = NULL
;
716 assert(!ml_get_interrupts_enabled());
718 if (mp_PM_func
!= NULL
) {
724 cpu_PM_interrupt(int cpu
)
726 assert(!ml_get_interrupts_enabled());
728 if (mp_PM_func
!= NULL
) {
729 if (cpu
== cpu_number()) {
732 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
738 PM_interrupt_register(void (*fn
)(void))
744 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
746 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
747 uint64_t tsc_timeout
;
750 if (!cpu_datap(cpu
)->cpu_running
) {
754 if (event
== MP_TLB_FLUSH
) {
755 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
758 DBGLOG(cpu_signal
, cpu
, event
);
760 i_bit_set(event
, signals
);
764 tsc_timeout
= !machine_timeout_suspended() ?
765 rdtsc64() + (1000 * 1000 * 1000) :
767 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
770 if (i_bit(event
, signals
)) {
771 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
776 if (event
== MP_TLB_FLUSH
) {
777 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH
| DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
782 * Helper function called when busy-waiting: panic if too long
783 * a TSC-based time has elapsed since the start of the spin.
786 mp_spin_timeout(uint64_t tsc_start
)
788 uint64_t tsc_timeout
;
791 if (machine_timeout_suspended()) {
796 * The timeout is 4 * the spinlock timeout period
797 * unless we have serial console printing (kprintf) enabled
798 * in which case we allow an even greater margin.
800 tsc_timeout
= disable_serial_output
? LockTimeOutTSC
<< 2
801 : LockTimeOutTSC
<< 4;
802 return rdtsc64() > tsc_start
+ tsc_timeout
;
806 * Helper function to take a spinlock while ensuring that incoming IPIs
807 * are still serviced if interrupts are masked while we spin.
808 * Returns current interrupt state.
811 mp_safe_spin_lock(usimple_lock_t lock
)
813 if (ml_get_interrupts_enabled()) {
814 simple_lock(lock
, LCK_GRP_NULL
);
817 uint64_t tsc_spin_start
= rdtsc64();
818 while (!simple_lock_try(lock
, LCK_GRP_NULL
)) {
819 cpu_signal_handler(NULL
);
820 if (mp_spin_timeout(tsc_spin_start
)) {
822 uintptr_t lowner
= (uintptr_t)
823 lock
->interlock
.lock_data
;
824 spinlock_timed_out
= lock
;
825 lock_cpu
= spinlock_timeout_NMI(lowner
);
826 NMIPI_panic(cpu_to_cpumask(lock_cpu
), SPINLOCK_TIMEOUT
);
827 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
828 lock
, lowner
, current_thread(), lock_cpu
, mach_absolute_time());
836 * All-CPU rendezvous:
837 * - CPUs are signalled,
838 * - all execute the setup function (if specified),
839 * - rendezvous (i.e. all cpus reach a barrier),
840 * - all execute the action function (if specified),
841 * - rendezvous again,
842 * - execute the teardown function (if specified), and then
845 * Note that the supplied external functions _must_ be reentrant and aware
846 * that they are running in parallel and in an unknown lock context.
850 mp_rendezvous_action(__unused
void *null
)
852 boolean_t intrs_enabled
;
853 uint64_t tsc_spin_start
;
856 * Note that mp_rv_lock was acquired by the thread that initiated the
857 * rendezvous and must have been acquired before we enter
858 * mp_rendezvous_action().
860 current_cpu_datap()->cpu_rendezvous_in_progress
= TRUE
;
863 if (mp_rv_setup_func
!= NULL
) {
864 mp_rv_setup_func(mp_rv_func_arg
);
867 intrs_enabled
= ml_get_interrupts_enabled();
869 /* spin on entry rendezvous */
870 atomic_incl(&mp_rv_entry
, 1);
871 tsc_spin_start
= rdtsc64();
873 while (mp_rv_entry
< mp_rv_ncpus
) {
874 /* poll for pesky tlb flushes if interrupts disabled */
875 if (!intrs_enabled
) {
876 handle_pending_TLB_flushes();
878 if (mp_spin_timeout(tsc_spin_start
)) {
879 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
883 /* action function */
884 if (mp_rv_action_func
!= NULL
) {
885 mp_rv_action_func(mp_rv_func_arg
);
888 /* spin on exit rendezvous */
889 atomic_incl(&mp_rv_exit
, 1);
890 tsc_spin_start
= rdtsc64();
891 while (mp_rv_exit
< mp_rv_ncpus
) {
892 if (!intrs_enabled
) {
893 handle_pending_TLB_flushes();
895 if (mp_spin_timeout(tsc_spin_start
)) {
896 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
900 /* teardown function */
901 if (mp_rv_teardown_func
!= NULL
) {
902 mp_rv_teardown_func(mp_rv_func_arg
);
905 current_cpu_datap()->cpu_rendezvous_in_progress
= FALSE
;
907 /* Bump completion count */
908 atomic_incl(&mp_rv_complete
, 1);
912 mp_rendezvous(void (*setup_func
)(void *),
913 void (*action_func
)(void *),
914 void (*teardown_func
)(void *),
917 uint64_t tsc_spin_start
;
919 if (!smp_initialized
) {
920 if (setup_func
!= NULL
) {
923 if (action_func
!= NULL
) {
926 if (teardown_func
!= NULL
) {
932 /* obtain rendezvous lock */
933 mp_rendezvous_lock();
935 /* set static function pointers */
936 mp_rv_setup_func
= setup_func
;
937 mp_rv_action_func
= action_func
;
938 mp_rv_teardown_func
= teardown_func
;
939 mp_rv_func_arg
= arg
;
946 * signal other processors, which will call mp_rendezvous_action()
947 * with interrupts disabled
949 mp_rv_ncpus
= mp_cpus_call(CPUMASK_OTHERS
, NOSYNC
, &mp_rendezvous_action
, NULL
) + 1;
951 /* call executor function on this cpu */
952 mp_rendezvous_action(NULL
);
955 * Spin for everyone to complete.
956 * This is necessary to ensure that all processors have proceeded
957 * from the exit barrier before we release the rendezvous structure.
959 tsc_spin_start
= rdtsc64();
960 while (mp_rv_complete
< mp_rv_ncpus
) {
961 if (mp_spin_timeout(tsc_spin_start
)) {
962 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete
, mp_rv_ncpus
, tsc_spin_start
, rdtsc64());
967 mp_rv_setup_func
= NULL
;
968 mp_rv_action_func
= NULL
;
969 mp_rv_teardown_func
= NULL
;
970 mp_rv_func_arg
= NULL
;
973 mp_rendezvous_unlock();
977 mp_rendezvous_lock(void)
979 (void) mp_safe_spin_lock(&mp_rv_lock
);
983 mp_rendezvous_unlock(void)
985 simple_unlock(&mp_rv_lock
);
989 mp_rendezvous_break_lock(void)
991 simple_lock_init(&mp_rv_lock
, 0);
995 setup_disable_intrs(__unused
void * param_not_used
)
997 /* disable interrupts before the first barrier */
998 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
1000 current_cpu_datap()->cpu_iflag
= intr
;
1001 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
1005 teardown_restore_intrs(__unused
void * param_not_used
)
1007 /* restore interrupt flag following MTRR changes */
1008 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
1009 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
1013 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1014 * This is exported for use by kexts.
1017 mp_rendezvous_no_intrs(
1018 void (*action_func
)(void *),
1021 mp_rendezvous(setup_disable_intrs
,
1023 teardown_restore_intrs
,
1029 queue_chain_t link
; /* queue linkage */
1030 void (*func
)(void *, void *); /* routine to call */
1031 void *arg0
; /* routine's 1st arg */
1032 void *arg1
; /* routine's 2nd arg */
1033 cpumask_t
*maskp
; /* completion response mask */
1039 decl_simple_lock_data(, lock
);
1041 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1042 static mp_call_queue_t mp_cpus_call_freelist
;
1043 static mp_call_queue_t mp_cpus_call_head
[MAX_CPUS
];
1045 static inline boolean_t
1046 mp_call_head_lock(mp_call_queue_t
*cqp
)
1048 boolean_t intrs_enabled
;
1050 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1051 simple_lock(&cqp
->lock
, LCK_GRP_NULL
);
1053 return intrs_enabled
;
1057 * Deliver an NMIPI to a set of processors to cause them to panic .
1060 NMIPI_panic(cpumask_t cpu_mask
, NMI_reason_t why
)
1067 NMI_panic_reason
= why
;
1069 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
1070 if ((cpu_mask
& cpu_bit
) == 0) {
1073 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
1074 cpu_NMI_interrupt(cpu
);
1077 /* Wait (only so long) for NMi'ed cpus to respond */
1078 deadline
= mach_absolute_time() + LockTimeOut
;
1079 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
1080 if ((cpu_mask
& cpu_bit
) == 0) {
1083 while (!cpu_datap(cpu
)->cpu_NMI_acknowledged
&&
1084 mach_absolute_time() < deadline
) {
1091 static inline boolean_t
1092 mp_call_head_is_locked(mp_call_queue_t
*cqp
)
1094 return !ml_get_interrupts_enabled() &&
1095 hw_lock_held((hw_lock_t
)&cqp
->lock
);
1100 mp_call_head_unlock(mp_call_queue_t
*cqp
, boolean_t intrs_enabled
)
1102 simple_unlock(&cqp
->lock
);
1103 ml_set_interrupts_enabled(intrs_enabled
);
1106 static inline mp_call_t
*
1109 mp_call_t
*callp
= NULL
;
1110 boolean_t intrs_enabled
;
1111 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1113 intrs_enabled
= mp_call_head_lock(cqp
);
1114 if (!queue_empty(&cqp
->queue
)) {
1115 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1117 mp_call_head_unlock(cqp
, intrs_enabled
);
1123 mp_call_free(mp_call_t
*callp
)
1125 boolean_t intrs_enabled
;
1126 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1128 intrs_enabled
= mp_call_head_lock(cqp
);
1129 queue_enter_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1130 mp_call_head_unlock(cqp
, intrs_enabled
);
1133 static inline mp_call_t
*
1134 mp_call_dequeue_locked(mp_call_queue_t
*cqp
)
1136 mp_call_t
*callp
= NULL
;
1138 assert(mp_call_head_is_locked(cqp
));
1139 if (!queue_empty(&cqp
->queue
)) {
1140 queue_remove_first(&cqp
->queue
, callp
, typeof(callp
), link
);
1146 mp_call_enqueue_locked(
1147 mp_call_queue_t
*cqp
,
1150 queue_enter(&cqp
->queue
, callp
, typeof(callp
), link
);
1153 /* Called on the boot processor to initialize global structures */
1155 mp_cpus_call_init(void)
1157 mp_call_queue_t
*cqp
= &mp_cpus_call_freelist
;
1159 DBG("mp_cpus_call_init()\n");
1160 simple_lock_init(&cqp
->lock
, 0);
1161 queue_init(&cqp
->queue
);
1165 * Called at processor registration to add call buffers to the free list
1166 * and to initialize the per-cpu call queue.
1169 mp_cpus_call_cpu_init(int cpu
)
1172 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1175 simple_lock_init(&cqp
->lock
, 0);
1176 queue_init(&cqp
->queue
);
1177 for (i
= 0; i
< MP_CPUS_CALL_BUFS_PER_CPU
; i
++) {
1178 callp
= (mp_call_t
*) kalloc(sizeof(mp_call_t
));
1179 mp_call_free(callp
);
1182 DBG("mp_cpus_call_init(%d) done\n", cpu
);
1186 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1187 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1190 mp_cpus_call_action(void)
1192 mp_call_queue_t
*cqp
;
1193 boolean_t intrs_enabled
;
1197 assert(!ml_get_interrupts_enabled());
1198 cqp
= &mp_cpus_call_head
[cpu_number()];
1199 intrs_enabled
= mp_call_head_lock(cqp
);
1200 while ((callp
= mp_call_dequeue_locked(cqp
)) != NULL
) {
1201 /* Copy call request to the stack to free buffer */
1203 mp_call_free(callp
);
1204 if (call
.func
!= NULL
) {
1205 mp_call_head_unlock(cqp
, intrs_enabled
);
1206 KERNEL_DEBUG_CONSTANT(
1207 TRACE_MP_CPUS_CALL_ACTION
,
1208 VM_KERNEL_UNSLIDE(call
.func
), VM_KERNEL_UNSLIDE_OR_PERM(call
.arg0
),
1209 VM_KERNEL_UNSLIDE_OR_PERM(call
.arg1
), VM_KERNEL_ADDRPERM(call
.maskp
), 0);
1210 call
.func(call
.arg0
, call
.arg1
);
1211 (void) mp_call_head_lock(cqp
);
1213 if (call
.maskp
!= NULL
) {
1214 i_bit_set(cpu_number(), call
.maskp
);
1217 mp_call_head_unlock(cqp
, intrs_enabled
);
1221 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1222 * Possible modes are:
1223 * SYNC: function is called serially on target cpus in logical cpu order
1224 * waiting for each call to be acknowledged before proceeding
1225 * ASYNC: function call is queued to the specified cpus
1226 * waiting for all calls to complete in parallel before returning
1227 * NOSYNC: function calls are queued
1228 * but we return before confirmation of calls completing.
1229 * The action function may be NULL.
1230 * The cpu mask may include the local cpu. Offline cpus are ignored.
1231 * The return value is the number of cpus on which the call was made or queued.
1237 void (*action_func
)(void *),
1240 return mp_cpus_call1(
1243 (void (*)(void *, void *))action_func
,
1250 mp_cpus_call_wait(boolean_t intrs_enabled
,
1251 cpumask_t cpus_called
,
1252 cpumask_t
*cpus_responded
)
1254 mp_call_queue_t
*cqp
;
1255 uint64_t tsc_spin_start
;
1257 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1258 cqp
= &mp_cpus_call_head
[cpu_number()];
1260 tsc_spin_start
= rdtsc64();
1261 while (*cpus_responded
!= cpus_called
) {
1262 if (!intrs_enabled
) {
1263 /* Sniffing w/o locking */
1264 if (!queue_empty(&cqp
->queue
)) {
1265 mp_cpus_call_action();
1267 cpu_signal_handler(NULL
);
1269 if (mp_spin_timeout(tsc_spin_start
)) {
1270 cpumask_t cpus_unresponsive
;
1272 cpus_unresponsive
= cpus_called
& ~(*cpus_responded
);
1273 NMIPI_panic(cpus_unresponsive
, CROSSCALL_TIMEOUT
);
1274 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1284 void (*action_func
)(void *, void *),
1287 cpumask_t
*cpus_calledp
)
1290 boolean_t intrs_enabled
= FALSE
;
1291 boolean_t call_self
= FALSE
;
1292 cpumask_t cpus_called
= 0;
1293 cpumask_t cpus_responded
= 0;
1294 long cpus_call_count
= 0;
1295 uint64_t tsc_spin_start
;
1296 boolean_t topo_lock
;
1298 KERNEL_DEBUG_CONSTANT(
1299 TRACE_MP_CPUS_CALL
| DBG_FUNC_START
,
1300 cpus
, mode
, VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
));
1302 if (!smp_initialized
) {
1303 if ((cpus
& CPUMASK_SELF
) == 0) {
1306 if (action_func
!= NULL
) {
1307 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1308 action_func(arg0
, arg1
);
1309 ml_set_interrupts_enabled(intrs_enabled
);
1316 * Queue the call for each non-local requested cpu.
1317 * This is performed under the topo lock to prevent changes to
1318 * cpus online state and to prevent concurrent rendezvouses --
1319 * although an exception is made if we're calling only the master
1320 * processor since that always remains active. Note: this exception
1321 * is expected for longterm timer nosync cross-calls to the master cpu.
1323 mp_disable_preemption();
1324 intrs_enabled
= ml_get_interrupts_enabled();
1325 topo_lock
= (cpus
!= cpu_to_cpumask(master_cpu
));
1327 ml_set_interrupts_enabled(FALSE
);
1328 (void) mp_safe_spin_lock(&x86_topo_lock
);
1330 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1331 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
1332 !cpu_is_running(cpu
)) {
1335 tsc_spin_start
= rdtsc64();
1336 if (cpu
== (cpu_t
) cpu_number()) {
1338 * We don't IPI ourself and if calling asynchronously,
1339 * we defer our call until we have signalled all others.
1342 if (mode
== SYNC
&& action_func
!= NULL
) {
1343 KERNEL_DEBUG_CONSTANT(
1344 TRACE_MP_CPUS_CALL_LOCAL
,
1345 VM_KERNEL_UNSLIDE(action_func
),
1346 VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1347 action_func(arg0
, arg1
);
1351 * Here to queue a call to cpu and IPI.
1353 mp_call_t
*callp
= NULL
;
1354 mp_call_queue_t
*cqp
= &mp_cpus_call_head
[cpu
];
1355 boolean_t intrs_inner
;
1358 if (callp
== NULL
) {
1359 callp
= mp_call_alloc();
1361 intrs_inner
= mp_call_head_lock(cqp
);
1362 if (callp
== NULL
) {
1363 mp_call_head_unlock(cqp
, intrs_inner
);
1364 KERNEL_DEBUG_CONSTANT(
1365 TRACE_MP_CPUS_CALL_NOBUF
,
1368 /* Sniffing w/o locking */
1369 if (!queue_empty(&cqp
->queue
)) {
1370 mp_cpus_call_action();
1372 handle_pending_TLB_flushes();
1374 if (mp_spin_timeout(tsc_spin_start
)) {
1375 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1376 tsc_spin_start
, rdtsc64());
1380 callp
->maskp
= (mode
== NOSYNC
) ? NULL
: &cpus_responded
;
1381 callp
->func
= action_func
;
1384 mp_call_enqueue_locked(cqp
, callp
);
1386 cpus_called
|= cpu_to_cpumask(cpu
);
1387 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
1388 mp_call_head_unlock(cqp
, intrs_inner
);
1390 mp_cpus_call_wait(intrs_inner
, cpus_called
, &cpus_responded
);
1395 simple_unlock(&x86_topo_lock
);
1396 ml_set_interrupts_enabled(intrs_enabled
);
1399 /* Call locally if mode not SYNC */
1400 if (mode
!= SYNC
&& call_self
) {
1401 KERNEL_DEBUG_CONSTANT(
1402 TRACE_MP_CPUS_CALL_LOCAL
,
1403 VM_KERNEL_UNSLIDE(action_func
), VM_KERNEL_UNSLIDE_OR_PERM(arg0
), VM_KERNEL_UNSLIDE_OR_PERM(arg1
), 0, 0);
1404 if (action_func
!= NULL
) {
1405 ml_set_interrupts_enabled(FALSE
);
1406 action_func(arg0
, arg1
);
1407 ml_set_interrupts_enabled(intrs_enabled
);
1411 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1412 if (mode
== ASYNC
) {
1413 mp_cpus_call_wait(intrs_enabled
, cpus_called
, &cpus_responded
);
1416 /* Safe to allow pre-emption now */
1417 mp_enable_preemption();
1421 cpus_called
|= cpu_to_cpumask(cpu
);
1426 *cpus_calledp
= cpus_called
;
1429 KERNEL_DEBUG_CONSTANT(
1430 TRACE_MP_CPUS_CALL
| DBG_FUNC_END
,
1431 cpus_call_count
, cpus_called
, 0, 0, 0);
1433 return (cpu_t
) cpus_call_count
;
1438 mp_broadcast_action(__unused
void *null
)
1440 /* call action function */
1441 if (mp_bc_action_func
!= NULL
) {
1442 mp_bc_action_func(mp_bc_func_arg
);
1445 /* if we're the last one through, wake up the instigator */
1446 if (atomic_decl_and_test(&mp_bc_count
, 1)) {
1447 thread_wakeup(((event_t
)(uintptr_t) &mp_bc_count
));
1452 * mp_broadcast() runs a given function on all active cpus.
1453 * The caller blocks until the functions has run on all cpus.
1454 * The caller will also block if there is another pending broadcast.
1458 void (*action_func
)(void *),
1461 if (!smp_initialized
) {
1462 if (action_func
!= NULL
) {
1468 /* obtain broadcast lock */
1469 lck_mtx_lock(&mp_bc_lock
);
1471 /* set static function pointers */
1472 mp_bc_action_func
= action_func
;
1473 mp_bc_func_arg
= arg
;
1475 assert_wait((event_t
)(uintptr_t)&mp_bc_count
, THREAD_UNINT
);
1478 * signal other processors, which will call mp_broadcast_action()
1480 mp_bc_count
= real_ncpus
; /* assume max possible active */
1481 mp_bc_ncpus
= mp_cpus_call(CPUMASK_ALL
, NOSYNC
, *mp_broadcast_action
, NULL
);
1482 atomic_decl(&mp_bc_count
, real_ncpus
- mp_bc_ncpus
); /* subtract inactive */
1484 /* block for other cpus to have run action_func */
1485 if (mp_bc_ncpus
> 1) {
1486 thread_block(THREAD_CONTINUE_NULL
);
1488 clear_wait(current_thread(), THREAD_AWAKENED
);
1492 lck_mtx_unlock(&mp_bc_lock
);
1496 mp_cpus_kick(cpumask_t cpus
)
1499 boolean_t intrs_enabled
= FALSE
;
1501 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
1502 mp_safe_spin_lock(&x86_topo_lock
);
1504 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
1505 if ((cpu
== (cpu_t
) cpu_number())
1506 || ((cpu_to_cpumask(cpu
) & cpus
) == 0)
1507 || !cpu_is_running(cpu
)) {
1511 lapic_send_ipi(cpu
, LAPIC_VECTOR(KICK
));
1514 simple_unlock(&x86_topo_lock
);
1515 ml_set_interrupts_enabled(intrs_enabled
);
1519 i386_activate_cpu(void)
1521 cpu_data_t
*cdp
= current_cpu_datap();
1523 assert(!ml_get_interrupts_enabled());
1525 if (!smp_initialized
) {
1526 cdp
->cpu_running
= TRUE
;
1530 mp_safe_spin_lock(&x86_topo_lock
);
1531 cdp
->cpu_running
= TRUE
;
1533 pmap_tlbi_range(0, ~0ULL, true, 0);
1534 simple_unlock(&x86_topo_lock
);
1538 i386_deactivate_cpu(void)
1540 cpu_data_t
*cdp
= current_cpu_datap();
1542 assert(!ml_get_interrupts_enabled());
1544 KERNEL_DEBUG_CONSTANT(
1545 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_START
,
1548 mp_safe_spin_lock(&x86_topo_lock
);
1549 cdp
->cpu_running
= FALSE
;
1550 simple_unlock(&x86_topo_lock
);
1553 * Move all of this cpu's timers to the master/boot cpu,
1554 * and poke it in case there's a sooner deadline for it to schedule.
1556 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1557 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, timer_queue_expire_local
, NULL
);
1561 #endif /* MONOTONIC */
1564 * Open an interrupt window
1565 * and ensure any pending IPI or timer is serviced
1567 mp_disable_preemption();
1568 ml_set_interrupts_enabled(TRUE
);
1570 while (cdp
->cpu_signals
&& x86_lcpu()->rtcDeadline
!= EndOfAllTime
) {
1574 * Ensure there's no remaining timer deadline set
1575 * - AICPM may have left one active.
1579 ml_set_interrupts_enabled(FALSE
);
1580 mp_enable_preemption();
1582 KERNEL_DEBUG_CONSTANT(
1583 TRACE_MP_CPU_DEACTIVATE
| DBG_FUNC_END
,
1587 int pmsafe_debug
= 1;
1590 volatile boolean_t mp_kdp_trap
= FALSE
;
1591 volatile boolean_t mp_kdp_is_NMI
= FALSE
;
1592 volatile unsigned long mp_kdp_ncpus
;
1593 boolean_t mp_kdp_state
;
1597 mp_kdp_enter(boolean_t proceed_on_failure
)
1600 unsigned int ncpus
= 0;
1601 unsigned int my_cpu
;
1602 uint64_t tsc_timeout
;
1604 DBG("mp_kdp_enter()\n");
1607 * Here to enter the debugger.
1608 * In case of races, only one cpu is allowed to enter kdp after
1611 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1612 my_cpu
= cpu_number();
1614 if (my_cpu
== (unsigned) debugger_cpu
) {
1615 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1620 uint64_t start_time
= cpu_datap(my_cpu
)->debugger_entry_time
= mach_absolute_time();
1622 while (!locked
|| mp_kdp_trap
) {
1624 simple_unlock(&x86_topo_lock
);
1626 if (proceed_on_failure
) {
1627 if (mach_absolute_time() - start_time
> 500000000ll) {
1628 paniclog_append_noflush("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1631 locked
= simple_lock_try(&x86_topo_lock
, LCK_GRP_NULL
);
1636 mp_safe_spin_lock(&x86_topo_lock
);
1640 if (locked
&& mp_kdp_trap
) {
1641 simple_unlock(&x86_topo_lock
);
1642 DBG("mp_kdp_enter() race lost\n");
1644 mp_kdp_wait(TRUE
, FALSE
);
1650 if (pmsafe_debug
&& !kdp_snapshot
) {
1651 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1654 debugger_cpu
= my_cpu
;
1656 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1658 debugger_entry_time
= cpu_datap(my_cpu
)->debugger_entry_time
;
1661 * Deliver a nudge to other cpus, counting how many
1663 DBG("mp_kdp_enter() signaling other processors\n");
1664 if (force_immediate_debugger_NMI
== FALSE
) {
1665 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1666 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1670 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1673 * Wait other processors to synchronize
1675 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1678 * This timeout is rather arbitrary; we don't want to NMI
1679 * processors that are executing at potentially
1680 * "unsafe-to-interrupt" points such as the trampolines,
1681 * but neither do we want to lose state by waiting too long.
1683 tsc_timeout
= rdtsc64() + (LockTimeOutTSC
);
1685 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1687 * A TLB shootdown request may be pending--this would
1688 * result in the requesting processor waiting in
1689 * PMAP_UPDATE_TLBS() until this processor deals with it.
1690 * Process it, so it can now enter mp_kdp_wait()
1692 handle_pending_TLB_flushes();
1695 /* If we've timed out, and some processor(s) are still unresponsive,
1696 * interrupt them with an NMI via the local APIC, iff a panic is
1699 if (panic_active()) {
1702 if (mp_kdp_ncpus
!= ncpus
) {
1703 unsigned int wait_cycles
= 0;
1704 if (proceed_on_failure
) {
1705 paniclog_append_noflush("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu
);
1707 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu
);
1709 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1710 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1713 if (cpu_signal_pending(cpu
, MP_KDP
)) {
1714 cpu_datap(cpu
)->cpu_NMI_acknowledged
= FALSE
;
1715 cpu_NMI_interrupt(cpu
);
1718 /* Wait again for the same timeout */
1719 tsc_timeout
= rdtsc64() + (LockTimeOutTSC
);
1720 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1721 handle_pending_TLB_flushes();
1725 if (mp_kdp_ncpus
!= ncpus
) {
1726 paniclog_append_noflush("mp_kdp_enter() NMI pending on cpus:");
1727 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1728 if (cpu_is_running(cpu
) && !cpu_datap(cpu
)->cpu_NMI_acknowledged
) {
1729 paniclog_append_noflush(" %d", cpu
);
1732 paniclog_append_noflush("\n");
1733 if (proceed_on_failure
) {
1734 paniclog_append_noflush("mp_kdp_enter() timed-out during %s wait after NMI;"
1735 "expected %u acks but received %lu after %u loops in %llu ticks\n",
1736 (locked
? "locked" : "unlocked"), ncpus
, mp_kdp_ncpus
, wait_cycles
, LockTimeOutTSC
);
1738 panic("mp_kdp_enter() timed-out during %s wait after NMI;"
1739 "expected %u acks but received %lu after %u loops in %llu ticks",
1740 (locked
? "locked" : "unlocked"), ncpus
, mp_kdp_ncpus
, wait_cycles
, LockTimeOutTSC
);
1745 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1746 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1749 cpu_NMI_interrupt(cpu
);
1754 simple_unlock(&x86_topo_lock
);
1757 DBG("mp_kdp_enter() %d processors done %s\n",
1758 (int)mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1760 postcode(MP_KDP_ENTER
);
1764 mp_kdp_all_cpus_halted()
1766 unsigned int ncpus
= 0, cpu
= 0, my_cpu
= 0;
1768 my_cpu
= cpu_number();
1769 ncpus
= 1; /* current CPU */
1770 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1771 if (cpu
== my_cpu
|| !cpu_is_running(cpu
)) {
1777 return mp_kdp_ncpus
== ncpus
;
1781 cpu_signal_pending(int cpu
, mp_event_t event
)
1783 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1784 boolean_t retval
= FALSE
;
1786 if (i_bit(event
, signals
)) {
1793 kdp_x86_xcpu_invoke(const uint16_t lcpu
, kdp_x86_xcpu_func_t func
,
1794 void *arg0
, void *arg1
)
1796 if (lcpu
> (real_ncpus
- 1)) {
1804 kdp_xcpu_call_func
.func
= func
;
1805 kdp_xcpu_call_func
.ret
= -1;
1806 kdp_xcpu_call_func
.arg0
= arg0
;
1807 kdp_xcpu_call_func
.arg1
= arg1
;
1808 kdp_xcpu_call_func
.cpu
= lcpu
;
1809 DBG("Invoking function %p on CPU %d\n", func
, (int32_t)lcpu
);
1810 while (kdp_xcpu_call_func
.cpu
!= KDP_XCPU_NONE
) {
1813 return kdp_xcpu_call_func
.ret
;
1817 kdp_x86_xcpu_poll(void)
1819 if ((uint16_t)cpu_number() == kdp_xcpu_call_func
.cpu
) {
1820 kdp_xcpu_call_func
.ret
=
1821 kdp_xcpu_call_func
.func(kdp_xcpu_call_func
.arg0
,
1822 kdp_xcpu_call_func
.arg1
,
1824 kdp_xcpu_call_func
.cpu
= KDP_XCPU_NONE
;
1829 mp_kdp_wait(boolean_t flush
, boolean_t isNMI
)
1831 DBG("mp_kdp_wait()\n");
1833 current_cpu_datap()->debugger_ipi_time
= mach_absolute_time();
1835 /* If we've trapped due to a machine-check, save MCA registers */
1839 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1840 while (mp_kdp_trap
|| (isNMI
== TRUE
)) {
1842 * A TLB shootdown request may be pending--this would result
1843 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1844 * until this processor handles it.
1845 * Process it, so it can now enter mp_kdp_wait()
1848 handle_pending_TLB_flushes();
1851 kdp_x86_xcpu_poll();
1855 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1856 DBG("mp_kdp_wait() done\n");
1862 DBG("mp_kdp_exit()\n");
1864 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1866 debugger_exit_time
= mach_absolute_time();
1868 mp_kdp_trap
= FALSE
;
1871 /* Wait other processors to stop spinning. XXX needs timeout */
1872 DBG("mp_kdp_exit() waiting for processors to resume\n");
1873 while (mp_kdp_ncpus
> 0) {
1875 * a TLB shootdown request may be pending... this would result in the requesting
1876 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1877 * Process it, so it can now enter mp_kdp_wait()
1879 handle_pending_TLB_flushes();
1884 if (pmsafe_debug
&& !kdp_snapshot
) {
1885 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1888 debugger_exit_time
= mach_absolute_time();
1890 DBG("mp_kdp_exit() done\n");
1891 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1892 postcode(MP_KDP_EXIT
);
1895 #endif /* MACH_KDP */
1898 mp_recent_debugger_activity(void)
1900 uint64_t abstime
= mach_absolute_time();
1901 return ((abstime
- debugger_entry_time
) < LastDebuggerEntryAllowance
) ||
1902 ((abstime
- debugger_exit_time
) < LastDebuggerEntryAllowance
);
1908 __unused processor_t processor
)
1914 processor_t processor
)
1916 int cpu
= processor
->cpu_id
;
1918 if (cpu
!= cpu_number()) {
1919 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1920 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), cpu
, 1, 0, 0, 0);
1925 slave_machine_init(void *param
)
1928 * Here in process context, but with interrupts disabled.
1930 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1932 if (param
== FULL_SLAVE_INIT
) {
1938 cpu_machine_init(); /* Interrupts enabled hereafter */
1945 return get_cpu_number();
1953 simple_lock_init(&cpu_warm_lock
, 0);
1954 queue_init(&cpu_warm_call_list
);
1955 for (i
= 0; i
< NUM_CPU_WARM_CALLS
; i
++) {
1956 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)&cpu_warm_call_arr
[i
]);
1961 grab_warm_timer_call()
1964 timer_call_t call
= NULL
;
1967 simple_lock(&cpu_warm_lock
, LCK_GRP_NULL
);
1968 if (!queue_empty(&cpu_warm_call_list
)) {
1969 call
= (timer_call_t
) dequeue_head(&cpu_warm_call_list
);
1971 simple_unlock(&cpu_warm_lock
);
1978 free_warm_timer_call(timer_call_t call
)
1983 simple_lock(&cpu_warm_lock
, LCK_GRP_NULL
);
1984 enqueue_head(&cpu_warm_call_list
, (queue_entry_t
)call
);
1985 simple_unlock(&cpu_warm_lock
);
1990 * Runs in timer call context (interrupts disabled).
1993 cpu_warm_timer_call_func(
1994 call_entry_param_t p0
,
1995 __unused call_entry_param_t p1
)
1997 free_warm_timer_call((timer_call_t
)p0
);
2002 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
2008 cpu_warm_data_t cwdp
= (cpu_warm_data_t
)arg
;
2010 timer_call_enter(cwdp
->cwd_call
, cwdp
->cwd_deadline
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
2011 cwdp
->cwd_result
= 0;
2017 * Not safe to call with interrupts disabled.
2020 ml_interrupt_prewarm(
2023 struct cpu_warm_data cwd
;
2027 if (ml_get_interrupts_enabled() == FALSE
) {
2028 panic("%s: Interrupts disabled?\n", __FUNCTION__
);
2032 * If the platform doesn't need our help, say that we succeeded.
2034 if (!ml_get_interrupt_prewake_applicable()) {
2035 return KERN_SUCCESS
;
2039 * Grab a timer call to use.
2041 call
= grab_warm_timer_call();
2043 return KERN_RESOURCE_SHORTAGE
;
2046 timer_call_setup(call
, cpu_warm_timer_call_func
, call
);
2047 cwd
.cwd_call
= call
;
2048 cwd
.cwd_deadline
= deadline
;
2052 * For now, non-local interrupts happen on the master processor.
2054 ct
= mp_cpus_call(cpu_to_cpumask(master_cpu
), SYNC
, _cpu_warm_setup
, &cwd
);
2056 free_warm_timer_call(call
);
2057 return KERN_FAILURE
;
2059 return cwd
.cwd_result
;
2063 #if DEBUG || DEVELOPMENT
2065 kernel_spin(uint64_t spin_ns
)
2072 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns
);
2073 istate
= ml_set_interrupts_enabled(FALSE
);
2074 cdp
= current_cpu_datap();
2075 nanoseconds_to_absolutetime(spin_ns
, &spin_abs
);
2077 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
2078 cdp
->cpu_int_event_time
= mach_absolute_time();
2079 cdp
->cpu_int_state
= (void *) USER_STATE(current_thread());
2081 deadline
= mach_absolute_time() + spin_ns
;
2082 while (mach_absolute_time() < deadline
) {
2086 cdp
->cpu_int_event_time
= 0;
2087 cdp
->cpu_int_state
= NULL
;
2089 ml_set_interrupts_enabled(istate
);
2090 kprintf("kernel_spin() continuing\n");
2094 * Called from the scheduler's maintenance thread,
2095 * scan running processors for long-running ISRs and:
2096 * - panic if longer than LockTimeOut, or
2097 * - log if more than a quantum.
2100 mp_interrupt_watchdog(void)
2103 boolean_t intrs_enabled
= FALSE
;
2104 uint16_t cpu_int_num
;
2105 uint64_t cpu_int_event_time
;
2107 uint64_t cpu_int_duration
;
2109 x86_saved_state_t
*cpu_int_state
;
2111 if (__improbable(!mp_interrupt_watchdog_enabled
)) {
2115 intrs_enabled
= ml_set_interrupts_enabled(FALSE
);
2116 now
= mach_absolute_time();
2118 * While timeouts are not suspended,
2119 * check all other processors for long outstanding interrupt handling.
2122 cpu
< (cpu_t
) real_ncpus
&& !machine_timeout_suspended();
2124 if ((cpu
== (cpu_t
) cpu_number()) ||
2125 (!cpu_is_running(cpu
))) {
2128 cpu_int_event_time
= cpu_datap(cpu
)->cpu_int_event_time
;
2129 if (cpu_int_event_time
== 0) {
2132 if (__improbable(now
< cpu_int_event_time
)) {
2133 continue; /* skip due to inter-processor skew */
2135 cpu_int_state
= cpu_datap(cpu
)->cpu_int_state
;
2136 if (__improbable(cpu_int_state
== NULL
)) {
2137 /* The interrupt may have been dismissed */
2141 /* Here with a cpu handling an interrupt */
2143 cpu_int_duration
= now
- cpu_int_event_time
;
2144 if (__improbable(cpu_int_duration
> LockTimeOut
)) {
2145 cpu_int_num
= saved_state64(cpu_int_state
)->isf
.trapno
;
2146 cpu_rip
= saved_state64(cpu_int_state
)->isf
.rip
;
2147 vector_timed_out
= cpu_int_num
;
2148 NMIPI_panic(cpu_to_cpumask(cpu
), INTERRUPT_WATCHDOG
);
2149 panic("Interrupt watchdog, "
2150 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2151 cpu
, cpu_int_num
, cpu_int_event_time
, now
, cpu_int_state
, cpu_rip
);
2153 } else if (__improbable(cpu_int_duration
> (uint64_t) std_quantum
)) {
2154 mp_interrupt_watchdog_events
++;
2155 cpu_int_num
= saved_state64(cpu_int_state
)->isf
.trapno
;
2156 cpu_rip
= saved_state64(cpu_int_state
)->isf
.rip
;
2157 ml_set_interrupts_enabled(intrs_enabled
);
2158 printf("Interrupt watchdog, "
2159 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2160 cpu
, cpu_int_num
, cpu_int_event_time
, now
, cpu_rip
);
2165 ml_set_interrupts_enabled(intrs_enabled
);