]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/i386/mp.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31
32#include <mach_kdp.h>
33#include <kdp/kdp_internal.h>
34#include <mach_ldebug.h>
35
36#include <mach/mach_types.h>
37#include <mach/kern_return.h>
38
39#include <kern/kern_types.h>
40#include <kern/startup.h>
41#include <kern/timer_queue.h>
42#include <kern/processor.h>
43#include <kern/cpu_number.h>
44#include <kern/cpu_data.h>
45#include <kern/assert.h>
46#include <kern/lock_group.h>
47#include <kern/machine.h>
48#include <kern/pms.h>
49#include <kern/misc_protos.h>
50#include <kern/timer_call.h>
51#include <kern/zalloc.h>
52#include <kern/queue.h>
53#include <prng/random.h>
54
55#include <vm/vm_map.h>
56#include <vm/vm_kern.h>
57
58#include <i386/bit_routines.h>
59#include <i386/proc_reg.h>
60#include <i386/cpu_threads.h>
61#include <i386/mp_desc.h>
62#include <i386/misc_protos.h>
63#include <i386/trap.h>
64#include <i386/postcode.h>
65#include <i386/machine_routines.h>
66#include <i386/mp.h>
67#include <i386/mp_events.h>
68#include <i386/lapic.h>
69#include <i386/cpuid.h>
70#include <i386/fpu.h>
71#include <i386/machine_cpu.h>
72#include <i386/pmCPU.h>
73#if CONFIG_MCA
74#include <i386/machine_check.h>
75#endif
76#include <i386/acpi.h>
77
78#include <sys/kdebug.h>
79
80#include <console/serial_protos.h>
81
82#if MONOTONIC
83#include <kern/monotonic.h>
84#endif /* MONOTONIC */
85
86#if MP_DEBUG
87#define PAUSE delay(1000000)
88#define DBG(x...) kprintf(x)
89#else
90#define DBG(x...)
91#define PAUSE
92#endif /* MP_DEBUG */
93
94/* Debugging/test trace events: */
95#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
96#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
97#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
98#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
99#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
100#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
101#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
102#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
103
104#define ABS(v) (((v) > 0)?(v):-(v))
105
106void slave_boot_init(void);
107void i386_cpu_IPI(int cpu);
108
109#if MACH_KDP
110static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
111#endif /* MACH_KDP */
112
113#if MACH_KDP
114static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
115#endif /* MACH_KDP */
116static int NMIInterruptHandler(x86_saved_state_t *regs);
117
118boolean_t smp_initialized = FALSE;
119uint32_t TSC_sync_margin = 0xFFF;
120volatile boolean_t force_immediate_debugger_NMI = FALSE;
121volatile boolean_t pmap_tlb_flush_timeout = FALSE;
122#if DEBUG || DEVELOPMENT
123boolean_t mp_interrupt_watchdog_enabled = TRUE;
124uint32_t mp_interrupt_watchdog_events = 0;
125#endif
126
127SIMPLE_LOCK_DECLARE(debugger_callback_lock, 0);
128struct debugger_callback *debugger_callback = NULL;
129
130static LCK_GRP_DECLARE(smp_lck_grp, "i386_smp");
131static LCK_MTX_EARLY_DECLARE(mp_cpu_boot_lock, &smp_lck_grp);
132
133/* Variables needed for MP rendezvous. */
134SIMPLE_LOCK_DECLARE(mp_rv_lock, 0);
135static void (*mp_rv_setup_func)(void *arg);
136static void (*mp_rv_action_func)(void *arg);
137static void (*mp_rv_teardown_func)(void *arg);
138static void *mp_rv_func_arg;
139static volatile int mp_rv_ncpus;
140/* Cache-aligned barriers: */
141static volatile long mp_rv_entry __attribute__((aligned(64)));
142static volatile long mp_rv_exit __attribute__((aligned(64)));
143static volatile long mp_rv_complete __attribute__((aligned(64)));
144
145volatile uint64_t debugger_entry_time;
146volatile uint64_t debugger_exit_time;
147#if MACH_KDP
148#include <kdp/kdp.h>
149extern int kdp_snapshot;
150static struct _kdp_xcpu_call_func {
151 kdp_x86_xcpu_func_t func;
152 void *arg0, *arg1;
153 volatile long ret;
154 volatile uint16_t cpu;
155} kdp_xcpu_call_func = {
156 .cpu = KDP_XCPU_NONE
157};
158
159#endif
160
161/* Variables needed for MP broadcast. */
162static void (*mp_bc_action_func)(void *arg);
163static void *mp_bc_func_arg;
164static int mp_bc_ncpus;
165static volatile long mp_bc_count;
166static LCK_MTX_EARLY_DECLARE(mp_bc_lock, &smp_lck_grp);
167static volatile int debugger_cpu = -1;
168volatile long NMIPI_acks = 0;
169volatile long NMI_count = 0;
170static NMI_reason_t NMI_panic_reason = NONE;
171static int vector_timed_out;
172
173extern void NMI_cpus(void);
174
175static void mp_cpus_call_init(void);
176static void mp_cpus_call_action(void);
177static void mp_call_PM(void);
178
179char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
180
181/* PAL-related routines */
182boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
183 int ipi_vector, i386_intr_func_t ipi_handler);
184void i386_start_cpu(int lapic_id, int cpu_num);
185void i386_send_NMI(int cpu);
186void NMIPI_enable(boolean_t);
187
188#define NUM_CPU_WARM_CALLS 20
189struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
190queue_head_t cpu_warm_call_list;
191decl_simple_lock_data(static, cpu_warm_lock);
192
193typedef struct cpu_warm_data {
194 timer_call_t cwd_call;
195 uint64_t cwd_deadline;
196 int cwd_result;
197} *cpu_warm_data_t;
198
199static void cpu_prewarm_init(void);
200static void cpu_warm_timer_call_func(timer_call_param_t p0, timer_call_param_t p1);
201static void _cpu_warm_setup(void *arg);
202static timer_call_t grab_warm_timer_call(void);
203static void free_warm_timer_call(timer_call_t call);
204
205void
206smp_init(void)
207{
208 console_init();
209
210 if (!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
211 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler)) {
212 return;
213 }
214
215 cpu_thread_init();
216
217 DBGLOG_CPU_INIT(master_cpu);
218
219 mp_cpus_call_init();
220 mp_cpus_call_cpu_init(master_cpu);
221
222#if DEBUG || DEVELOPMENT
223 if (PE_parse_boot_argn("interrupt_watchdog",
224 &mp_interrupt_watchdog_enabled,
225 sizeof(mp_interrupt_watchdog_enabled))) {
226 kprintf("Interrupt watchdog %sabled\n",
227 mp_interrupt_watchdog_enabled ? "en" : "dis");
228 }
229#endif
230
231 if (PE_parse_boot_argn("TSC_sync_margin",
232 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
233 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
234 } else if (cpuid_vmm_present()) {
235 kprintf("TSC sync margin disabled\n");
236 TSC_sync_margin = 0;
237 }
238 smp_initialized = TRUE;
239
240 cpu_prewarm_init();
241
242 return;
243}
244
245typedef struct {
246 int target_cpu;
247 int target_lapic;
248 int starter_cpu;
249} processor_start_info_t;
250static processor_start_info_t start_info __attribute__((aligned(64)));
251
252/*
253 * Cache-alignment is to avoid cross-cpu false-sharing interference.
254 */
255static volatile long tsc_entry_barrier __attribute__((aligned(64)));
256static volatile long tsc_exit_barrier __attribute__((aligned(64)));
257static volatile uint64_t tsc_target __attribute__((aligned(64)));
258
259/*
260 * Poll a CPU to see when it has marked itself as running.
261 */
262static void
263mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
264{
265 while (iters-- > 0) {
266 if (cpu_datap(slot_num)->cpu_running) {
267 break;
268 }
269 delay(usecdelay);
270 }
271}
272
273/*
274 * Quickly bring a CPU back online which has been halted.
275 */
276kern_return_t
277intel_startCPU_fast(int slot_num)
278{
279 kern_return_t rc;
280
281 /*
282 * Try to perform a fast restart
283 */
284 rc = pmCPUExitHalt(slot_num);
285 if (rc != KERN_SUCCESS) {
286 /*
287 * The CPU was not eligible for a fast restart.
288 */
289 return rc;
290 }
291
292 KERNEL_DEBUG_CONSTANT(
293 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
294 slot_num, 0, 0, 0, 0);
295
296 /*
297 * Wait until the CPU is back online.
298 */
299 mp_disable_preemption();
300
301 /*
302 * We use short pauses (1us) for low latency. 30,000 iterations is
303 * longer than a full restart would require so it should be more
304 * than long enough.
305 */
306
307 mp_wait_for_cpu_up(slot_num, 30000, 1);
308 mp_enable_preemption();
309
310 KERNEL_DEBUG_CONSTANT(
311 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
312 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
313
314 /*
315 * Check to make sure that the CPU is really running. If not,
316 * go through the slow path.
317 */
318 if (cpu_datap(slot_num)->cpu_running) {
319 return KERN_SUCCESS;
320 } else {
321 return KERN_FAILURE;
322 }
323}
324
325static void
326started_cpu(void)
327{
328 /* Here on the started cpu with cpu_running set TRUE */
329
330 if (TSC_sync_margin &&
331 start_info.target_cpu == cpu_number()) {
332 /*
333 * I've just started-up, synchronize again with the starter cpu
334 * and then snap my TSC.
335 */
336 tsc_target = 0;
337 atomic_decl(&tsc_entry_barrier, 1);
338 while (tsc_entry_barrier != 0) {
339 ; /* spin for starter and target at barrier */
340 }
341 tsc_target = rdtsc64();
342 atomic_decl(&tsc_exit_barrier, 1);
343 }
344}
345
346static void
347start_cpu(void *arg)
348{
349 int i = 1000;
350 processor_start_info_t *psip = (processor_start_info_t *) arg;
351
352 /* Ignore this if the current processor is not the starter */
353 if (cpu_number() != psip->starter_cpu) {
354 return;
355 }
356
357 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
358 arg, psip->target_cpu, psip->target_lapic);
359
360 KERNEL_DEBUG_CONSTANT(
361 TRACE_MP_CPU_START | DBG_FUNC_START,
362 psip->target_cpu,
363 psip->target_lapic, 0, 0, 0);
364
365 i386_start_cpu(psip->target_lapic, psip->target_cpu);
366
367#ifdef POSTCODE_DELAY
368 /* Wait much longer if postcodes are displayed for a delay period. */
369 i *= 10000;
370#endif
371 DBG("start_cpu(%p) about to wait for cpu %d\n",
372 arg, psip->target_cpu);
373
374 mp_wait_for_cpu_up(psip->target_cpu, i * 100, 100);
375
376 KERNEL_DEBUG_CONSTANT(
377 TRACE_MP_CPU_START | DBG_FUNC_END,
378 psip->target_cpu,
379 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
380
381 if (TSC_sync_margin &&
382 cpu_datap(psip->target_cpu)->cpu_running) {
383 /*
384 * Compare the TSC from the started processor with ours.
385 * Report and log/panic if it diverges by more than
386 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
387 * can be overriden by boot-arg (with 0 meaning no checking).
388 */
389 uint64_t tsc_starter;
390 int64_t tsc_delta;
391 atomic_decl(&tsc_entry_barrier, 1);
392 while (tsc_entry_barrier != 0) {
393 ; /* spin for both processors at barrier */
394 }
395 tsc_starter = rdtsc64();
396 atomic_decl(&tsc_exit_barrier, 1);
397 while (tsc_exit_barrier != 0) {
398 ; /* spin for target to store its TSC */
399 }
400 tsc_delta = tsc_target - tsc_starter;
401 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
402 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
403#if DEBUG || DEVELOPMENT
404 /*
405 * Stash the delta for inspection later, since we can no
406 * longer print/log it with interrupts disabled.
407 */
408 cpu_datap(psip->target_cpu)->tsc_sync_delta = tsc_delta;
409#endif
410 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
411#if DEBUG
412 panic(
413#else
414 kprintf(
415#endif
416 "Unsynchronized TSC for cpu %d: "
417 "0x%016llx, delta 0x%llx\n",
418 psip->target_cpu, tsc_target, tsc_delta);
419 }
420 }
421}
422
423kern_return_t
424intel_startCPU(
425 int slot_num)
426{
427 int lapic = cpu_to_lapic[slot_num];
428 boolean_t istate;
429
430 assert(lapic != -1);
431
432 DBGLOG_CPU_INIT(slot_num);
433
434 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
435 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
436
437 /*
438 * Initialize (or re-initialize) the descriptor tables for this cpu.
439 * Propagate processor mode to slave.
440 */
441 cpu_desc_init(cpu_datap(slot_num));
442
443 /* Serialize use of the slave boot stack, etc. */
444 lck_mtx_lock(&mp_cpu_boot_lock);
445
446 istate = ml_set_interrupts_enabled(FALSE);
447 if (slot_num == get_cpu_number()) {
448 ml_set_interrupts_enabled(istate);
449 lck_mtx_unlock(&mp_cpu_boot_lock);
450 return KERN_SUCCESS;
451 }
452
453 start_info.starter_cpu = cpu_number();
454 start_info.target_cpu = slot_num;
455 start_info.target_lapic = lapic;
456 tsc_entry_barrier = 2;
457 tsc_exit_barrier = 2;
458
459 /*
460 * Perform the processor startup sequence with all running
461 * processors rendezvous'ed. This is required during periods when
462 * the cache-disable bit is set for MTRR/PAT initialization.
463 */
464 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
465
466 start_info.target_cpu = 0;
467
468 ml_set_interrupts_enabled(istate);
469 lck_mtx_unlock(&mp_cpu_boot_lock);
470
471 if (!cpu_datap(slot_num)->cpu_running) {
472 kprintf("Failed to start CPU %02d\n", slot_num);
473 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
474 delay(1000000);
475 halt_cpu();
476 return KERN_SUCCESS;
477 } else {
478 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
479 return KERN_SUCCESS;
480 }
481}
482
483#if MP_DEBUG
484cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
485cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
486
487MP_EVENT_NAME_DECL();
488
489#endif /* MP_DEBUG */
490
491/*
492 * Note: called with NULL state when polling for TLB flush and cross-calls.
493 */
494int
495cpu_signal_handler(x86_saved_state_t *regs)
496{
497#if !MACH_KDP
498#pragma unused (regs)
499#endif /* !MACH_KDP */
500 int my_cpu;
501 volatile int *my_word;
502
503 SCHED_STATS_INC(ipi_count);
504
505 my_cpu = cpu_number();
506 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
507 /* Store the initial set of signals for diagnostics. New
508 * signals could arrive while these are being processed
509 * so it's no more than a hint.
510 */
511
512 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
513
514 do {
515#if MACH_KDP
516 if (i_bit(MP_KDP, my_word)) {
517 DBGLOG(cpu_handle, my_cpu, MP_KDP);
518 i_bit_clear(MP_KDP, my_word);
519/* Ensure that the i386_kernel_state at the base of the
520 * current thread's stack (if any) is synchronized with the
521 * context at the moment of the interrupt, to facilitate
522 * access through the debugger.
523 */
524 sync_iss_to_iks(regs);
525 if (pmsafe_debug && !kdp_snapshot) {
526 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
527 }
528 mp_kdp_wait(TRUE, FALSE);
529 if (pmsafe_debug && !kdp_snapshot) {
530 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
531 }
532 } else
533#endif /* MACH_KDP */
534 if (i_bit(MP_TLB_FLUSH, my_word)) {
535 DBGLOG(cpu_handle, my_cpu, MP_TLB_FLUSH);
536 i_bit_clear(MP_TLB_FLUSH, my_word);
537 pmap_update_interrupt();
538 } else if (i_bit(MP_CALL, my_word)) {
539 DBGLOG(cpu_handle, my_cpu, MP_CALL);
540 i_bit_clear(MP_CALL, my_word);
541 mp_cpus_call_action();
542 } else if (i_bit(MP_CALL_PM, my_word)) {
543 DBGLOG(cpu_handle, my_cpu, MP_CALL_PM);
544 i_bit_clear(MP_CALL_PM, my_word);
545 mp_call_PM();
546 }
547 if (regs == NULL) {
548 /* Called to poll only for cross-calls and TLB flush */
549 break;
550 } else if (i_bit(MP_AST, my_word)) {
551 DBGLOG(cpu_handle, my_cpu, MP_AST);
552 i_bit_clear(MP_AST, my_word);
553 ast_check(cpu_to_processor(my_cpu));
554 }
555 } while (*my_word);
556
557 return 0;
558}
559
560extern void kprintf_break_lock(void);
561int
562NMIInterruptHandler(x86_saved_state_t *regs)
563{
564 void *stackptr;
565 char pstr[256];
566 uint64_t now = mach_absolute_time();
567
568 if (panic_active() && !panicDebugging) {
569 if (pmsafe_debug) {
570 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
571 }
572 for (;;) {
573 cpu_pause();
574 }
575 }
576
577 atomic_incl(&NMIPI_acks, 1);
578 atomic_incl(&NMI_count, 1);
579 sync_iss_to_iks_unconditionally(regs);
580 __asm__ volatile ("movq %%rbp, %0" : "=m" (stackptr));
581
582 if (cpu_number() == debugger_cpu) {
583 goto NMExit;
584 }
585
586 if (NMI_panic_reason == SPINLOCK_TIMEOUT) {
587 snprintf(&pstr[0], sizeof(pstr),
588 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
589 cpu_number(), now, spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
590 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
591 } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) {
592 snprintf(&pstr[0], sizeof(pstr),
593 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
594 cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid);
595 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
596 } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) {
597 snprintf(&pstr[0], sizeof(pstr),
598 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
599 cpu_number(), now);
600 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
601 } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) {
602 snprintf(&pstr[0], sizeof(pstr),
603 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
604 cpu_number(), now, vector_timed_out);
605 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
606 }
607
608#if MACH_KDP
609 if (pmsafe_debug && !kdp_snapshot) {
610 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
611 }
612 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
613 i_bit_clear(MP_KDP, &current_cpu_datap()->cpu_signals);
614 if (panic_active() || NMI_panic_reason != NONE) {
615 mp_kdp_wait(FALSE, TRUE);
616 } else if (!mp_kdp_trap &&
617 !mp_kdp_is_NMI &&
618 virtualized && (debug_boot_arg & DB_NMI)) {
619 /*
620 * Under a VMM with the debug boot-arg set, drop into kdp.
621 * Since an NMI is involved, there's a risk of contending with
622 * a panic. And side-effects of NMIs may result in entry into,
623 * and continuing from, the debugger being unreliable.
624 */
625 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
626 kprintf_break_lock();
627 kprintf("Debugger entry requested by NMI\n");
628 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0);
629 printf("Debugger entry requested by NMI\n");
630 mp_kdp_is_NMI = FALSE;
631 } else {
632 mp_kdp_wait(FALSE, FALSE);
633 }
634 } else {
635 mp_kdp_wait(FALSE, FALSE);
636 }
637 if (pmsafe_debug && !kdp_snapshot) {
638 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
639 }
640#endif
641NMExit:
642 return 1;
643}
644
645
646/*
647 * cpu_interrupt is really just to be used by the scheduler to
648 * get a CPU's attention it may not always issue an IPI. If an
649 * IPI is always needed then use i386_cpu_IPI.
650 */
651void
652cpu_interrupt(int cpu)
653{
654 boolean_t did_IPI = FALSE;
655
656 if (smp_initialized
657 && pmCPUExitIdle(cpu_datap(cpu))) {
658 i386_cpu_IPI(cpu);
659 did_IPI = TRUE;
660 }
661
662 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
663}
664
665/*
666 * Send a true NMI via the local APIC to the specified CPU.
667 */
668void
669cpu_NMI_interrupt(int cpu)
670{
671 if (smp_initialized) {
672 i386_send_NMI(cpu);
673 }
674}
675
676void
677NMI_cpus(void)
678{
679 unsigned int cpu;
680 boolean_t intrs_enabled;
681 uint64_t tsc_timeout;
682
683 intrs_enabled = ml_set_interrupts_enabled(FALSE);
684 NMIPI_enable(TRUE);
685 for (cpu = 0; cpu < real_ncpus; cpu++) {
686 if (!cpu_is_running(cpu)) {
687 continue;
688 }
689 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
690 cpu_NMI_interrupt(cpu);
691 tsc_timeout = !machine_timeout_suspended() ?
692 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
693 ~0ULL;
694 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
695 handle_pending_TLB_flushes();
696 cpu_pause();
697 if (rdtsc64() > tsc_timeout) {
698 panic("NMI_cpus() timeout cpu %d", cpu);
699 }
700 }
701 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
702 }
703 NMIPI_enable(FALSE);
704
705 ml_set_interrupts_enabled(intrs_enabled);
706}
707
708static void(*volatile mp_PM_func)(void) = NULL;
709
710static void
711mp_call_PM(void)
712{
713 assert(!ml_get_interrupts_enabled());
714
715 if (mp_PM_func != NULL) {
716 mp_PM_func();
717 }
718}
719
720void
721cpu_PM_interrupt(int cpu)
722{
723 assert(!ml_get_interrupts_enabled());
724
725 if (mp_PM_func != NULL) {
726 if (cpu == cpu_number()) {
727 mp_PM_func();
728 } else {
729 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
730 }
731 }
732}
733
734void
735PM_interrupt_register(void (*fn)(void))
736{
737 mp_PM_func = fn;
738}
739
740void
741i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
742{
743 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
744 uint64_t tsc_timeout;
745
746
747 if (!cpu_datap(cpu)->cpu_running) {
748 return;
749 }
750
751 if (event == MP_TLB_FLUSH) {
752 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
753 }
754
755 DBGLOG(cpu_signal, cpu, event);
756
757 i_bit_set(event, signals);
758 i386_cpu_IPI(cpu);
759 if (mode == SYNC) {
760again:
761 tsc_timeout = !machine_timeout_suspended() ?
762 rdtsc64() + (1000 * 1000 * 1000) :
763 ~0ULL;
764 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
765 cpu_pause();
766 }
767 if (i_bit(event, signals)) {
768 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
769 cpu, event);
770 goto again;
771 }
772 }
773 if (event == MP_TLB_FLUSH) {
774 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
775 }
776}
777
778/*
779 * Helper function called when busy-waiting: panic if too long
780 * a TSC-based time has elapsed since the start of the spin.
781 */
782static boolean_t
783mp_spin_timeout(uint64_t tsc_start)
784{
785 uint64_t tsc_timeout;
786
787 cpu_pause();
788 if (machine_timeout_suspended()) {
789 return FALSE;
790 }
791
792 /*
793 * The timeout is 4 * the spinlock timeout period
794 * unless we have serial console printing (kprintf) enabled
795 * in which case we allow an even greater margin.
796 */
797 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
798 : LockTimeOutTSC << 4;
799 return rdtsc64() > tsc_start + tsc_timeout;
800}
801
802/*
803 * Helper function to take a spinlock while ensuring that incoming IPIs
804 * are still serviced if interrupts are masked while we spin.
805 * Returns current interrupt state.
806 */
807boolean_t
808mp_safe_spin_lock(usimple_lock_t lock)
809{
810 if (ml_get_interrupts_enabled()) {
811 simple_lock(lock, LCK_GRP_NULL);
812 return TRUE;
813 } else {
814 uint64_t tsc_spin_start = rdtsc64();
815 while (!simple_lock_try(lock, LCK_GRP_NULL)) {
816 cpu_signal_handler(NULL);
817 if (mp_spin_timeout(tsc_spin_start)) {
818 uint32_t lock_cpu;
819 uintptr_t lowner = (uintptr_t)
820 lock->interlock.lock_data;
821 spinlock_timed_out = lock;
822 lock_cpu = spinlock_timeout_NMI(lowner);
823 NMIPI_panic(cpu_to_cpumask(lock_cpu), SPINLOCK_TIMEOUT);
824 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
825 lock, lowner, current_thread(), lock_cpu, mach_absolute_time());
826 }
827 }
828 return FALSE;
829 }
830}
831
832/*
833 * All-CPU rendezvous:
834 * - CPUs are signalled,
835 * - all execute the setup function (if specified),
836 * - rendezvous (i.e. all cpus reach a barrier),
837 * - all execute the action function (if specified),
838 * - rendezvous again,
839 * - execute the teardown function (if specified), and then
840 * - resume.
841 *
842 * Note that the supplied external functions _must_ be reentrant and aware
843 * that they are running in parallel and in an unknown lock context.
844 */
845
846static void
847mp_rendezvous_action(__unused void *null)
848{
849 boolean_t intrs_enabled;
850 uint64_t tsc_spin_start;
851
852 /*
853 * Note that mp_rv_lock was acquired by the thread that initiated the
854 * rendezvous and must have been acquired before we enter
855 * mp_rendezvous_action().
856 */
857 current_cpu_datap()->cpu_rendezvous_in_progress = TRUE;
858
859 /* setup function */
860 if (mp_rv_setup_func != NULL) {
861 mp_rv_setup_func(mp_rv_func_arg);
862 }
863
864 intrs_enabled = ml_get_interrupts_enabled();
865
866 /* spin on entry rendezvous */
867 atomic_incl(&mp_rv_entry, 1);
868 tsc_spin_start = rdtsc64();
869
870 while (mp_rv_entry < mp_rv_ncpus) {
871 /* poll for pesky tlb flushes if interrupts disabled */
872 if (!intrs_enabled) {
873 handle_pending_TLB_flushes();
874 }
875 if (mp_spin_timeout(tsc_spin_start)) {
876 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
877 }
878 }
879
880 /* action function */
881 if (mp_rv_action_func != NULL) {
882 mp_rv_action_func(mp_rv_func_arg);
883 }
884
885 /* spin on exit rendezvous */
886 atomic_incl(&mp_rv_exit, 1);
887 tsc_spin_start = rdtsc64();
888 while (mp_rv_exit < mp_rv_ncpus) {
889 if (!intrs_enabled) {
890 handle_pending_TLB_flushes();
891 }
892 if (mp_spin_timeout(tsc_spin_start)) {
893 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
894 }
895 }
896
897 /* teardown function */
898 if (mp_rv_teardown_func != NULL) {
899 mp_rv_teardown_func(mp_rv_func_arg);
900 }
901
902 current_cpu_datap()->cpu_rendezvous_in_progress = FALSE;
903
904 /* Bump completion count */
905 atomic_incl(&mp_rv_complete, 1);
906}
907
908void
909mp_rendezvous(void (*setup_func)(void *),
910 void (*action_func)(void *),
911 void (*teardown_func)(void *),
912 void *arg)
913{
914 uint64_t tsc_spin_start;
915
916 if (!smp_initialized) {
917 if (setup_func != NULL) {
918 setup_func(arg);
919 }
920 if (action_func != NULL) {
921 action_func(arg);
922 }
923 if (teardown_func != NULL) {
924 teardown_func(arg);
925 }
926 return;
927 }
928
929 /* obtain rendezvous lock */
930 mp_rendezvous_lock();
931
932 /* set static function pointers */
933 mp_rv_setup_func = setup_func;
934 mp_rv_action_func = action_func;
935 mp_rv_teardown_func = teardown_func;
936 mp_rv_func_arg = arg;
937
938 mp_rv_entry = 0;
939 mp_rv_exit = 0;
940 mp_rv_complete = 0;
941
942 /*
943 * signal other processors, which will call mp_rendezvous_action()
944 * with interrupts disabled
945 */
946 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
947
948 /* call executor function on this cpu */
949 mp_rendezvous_action(NULL);
950
951 /*
952 * Spin for everyone to complete.
953 * This is necessary to ensure that all processors have proceeded
954 * from the exit barrier before we release the rendezvous structure.
955 */
956 tsc_spin_start = rdtsc64();
957 while (mp_rv_complete < mp_rv_ncpus) {
958 if (mp_spin_timeout(tsc_spin_start)) {
959 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
960 }
961 }
962
963 /* Tidy up */
964 mp_rv_setup_func = NULL;
965 mp_rv_action_func = NULL;
966 mp_rv_teardown_func = NULL;
967 mp_rv_func_arg = NULL;
968
969 /* release lock */
970 mp_rendezvous_unlock();
971}
972
973void
974mp_rendezvous_lock(void)
975{
976 (void) mp_safe_spin_lock(&mp_rv_lock);
977}
978
979void
980mp_rendezvous_unlock(void)
981{
982 simple_unlock(&mp_rv_lock);
983}
984
985void
986mp_rendezvous_break_lock(void)
987{
988 simple_lock_init(&mp_rv_lock, 0);
989}
990
991static void
992setup_disable_intrs(__unused void * param_not_used)
993{
994 /* disable interrupts before the first barrier */
995 boolean_t intr = ml_set_interrupts_enabled(FALSE);
996
997 current_cpu_datap()->cpu_iflag = intr;
998 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
999}
1000
1001static void
1002teardown_restore_intrs(__unused void * param_not_used)
1003{
1004 /* restore interrupt flag following MTRR changes */
1005 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
1006 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
1007}
1008
1009/*
1010 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1011 * This is exported for use by kexts.
1012 */
1013void
1014mp_rendezvous_no_intrs(
1015 void (*action_func)(void *),
1016 void *arg)
1017{
1018 mp_rendezvous(setup_disable_intrs,
1019 action_func,
1020 teardown_restore_intrs,
1021 arg);
1022}
1023
1024
1025typedef struct {
1026 queue_chain_t link; /* queue linkage */
1027 void (*func)(void *, void *); /* routine to call */
1028 void *arg0; /* routine's 1st arg */
1029 void *arg1; /* routine's 2nd arg */
1030 cpumask_t *maskp; /* completion response mask */
1031} mp_call_t;
1032
1033
1034typedef struct {
1035 queue_head_t queue;
1036 decl_simple_lock_data(, lock);
1037} mp_call_queue_t;
1038#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1039static mp_call_queue_t mp_cpus_call_freelist;
1040static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
1041
1042static inline boolean_t
1043mp_call_head_lock(mp_call_queue_t *cqp)
1044{
1045 boolean_t intrs_enabled;
1046
1047 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1048 simple_lock(&cqp->lock, LCK_GRP_NULL);
1049
1050 return intrs_enabled;
1051}
1052
1053/*
1054 * Deliver an NMIPI to a set of processors to cause them to panic .
1055 */
1056void
1057NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why)
1058{
1059 unsigned int cpu;
1060 cpumask_t cpu_bit;
1061 uint64_t deadline;
1062
1063 NMIPI_enable(TRUE);
1064 NMI_panic_reason = why;
1065
1066 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1067 if ((cpu_mask & cpu_bit) == 0) {
1068 continue;
1069 }
1070 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1071 cpu_NMI_interrupt(cpu);
1072 }
1073
1074 /* Wait (only so long) for NMi'ed cpus to respond */
1075 deadline = mach_absolute_time() + LockTimeOut;
1076 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1077 if ((cpu_mask & cpu_bit) == 0) {
1078 continue;
1079 }
1080 while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
1081 mach_absolute_time() < deadline) {
1082 cpu_pause();
1083 }
1084 }
1085}
1086
1087#if MACH_ASSERT
1088static inline boolean_t
1089mp_call_head_is_locked(mp_call_queue_t *cqp)
1090{
1091 return !ml_get_interrupts_enabled() &&
1092 hw_lock_held((hw_lock_t)&cqp->lock);
1093}
1094#endif
1095
1096static inline void
1097mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
1098{
1099 simple_unlock(&cqp->lock);
1100 ml_set_interrupts_enabled(intrs_enabled);
1101}
1102
1103static inline mp_call_t *
1104mp_call_alloc(void)
1105{
1106 mp_call_t *callp = NULL;
1107 boolean_t intrs_enabled;
1108 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1109
1110 intrs_enabled = mp_call_head_lock(cqp);
1111 if (!queue_empty(&cqp->queue)) {
1112 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1113 }
1114 mp_call_head_unlock(cqp, intrs_enabled);
1115
1116 return callp;
1117}
1118
1119static inline void
1120mp_call_free(mp_call_t *callp)
1121{
1122 boolean_t intrs_enabled;
1123 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1124
1125 intrs_enabled = mp_call_head_lock(cqp);
1126 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1127 mp_call_head_unlock(cqp, intrs_enabled);
1128}
1129
1130static inline mp_call_t *
1131mp_call_dequeue_locked(mp_call_queue_t *cqp)
1132{
1133 mp_call_t *callp = NULL;
1134
1135 assert(mp_call_head_is_locked(cqp));
1136 if (!queue_empty(&cqp->queue)) {
1137 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1138 }
1139 return callp;
1140}
1141
1142static inline void
1143mp_call_enqueue_locked(
1144 mp_call_queue_t *cqp,
1145 mp_call_t *callp)
1146{
1147 queue_enter(&cqp->queue, callp, typeof(callp), link);
1148}
1149
1150/* Called on the boot processor to initialize global structures */
1151static void
1152mp_cpus_call_init(void)
1153{
1154 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1155
1156 DBG("mp_cpus_call_init()\n");
1157 simple_lock_init(&cqp->lock, 0);
1158 queue_init(&cqp->queue);
1159}
1160
1161/*
1162 * Called at processor registration to add call buffers to the free list
1163 * and to initialize the per-cpu call queue.
1164 */
1165void
1166mp_cpus_call_cpu_init(int cpu)
1167{
1168 int i;
1169 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1170 mp_call_t *callp;
1171
1172 simple_lock_init(&cqp->lock, 0);
1173 queue_init(&cqp->queue);
1174 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1175 callp = zalloc_permanent_type(mp_call_t);
1176 mp_call_free(callp);
1177 }
1178
1179 DBG("mp_cpus_call_init(%d) done\n", cpu);
1180}
1181
1182/*
1183 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1184 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1185 */
1186static void
1187mp_cpus_call_action(void)
1188{
1189 mp_call_queue_t *cqp;
1190 boolean_t intrs_enabled;
1191 mp_call_t *callp;
1192 mp_call_t call;
1193
1194 assert(!ml_get_interrupts_enabled());
1195 cqp = &mp_cpus_call_head[cpu_number()];
1196 intrs_enabled = mp_call_head_lock(cqp);
1197 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1198 /* Copy call request to the stack to free buffer */
1199 call = *callp;
1200 mp_call_free(callp);
1201 if (call.func != NULL) {
1202 mp_call_head_unlock(cqp, intrs_enabled);
1203 KERNEL_DEBUG_CONSTANT(
1204 TRACE_MP_CPUS_CALL_ACTION,
1205 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1206 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
1207 call.func(call.arg0, call.arg1);
1208 (void) mp_call_head_lock(cqp);
1209 }
1210 if (call.maskp != NULL) {
1211 i_bit_set(cpu_number(), call.maskp);
1212 }
1213 }
1214 mp_call_head_unlock(cqp, intrs_enabled);
1215}
1216
1217/*
1218 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1219 * Possible modes are:
1220 * SYNC: function is called serially on target cpus in logical cpu order
1221 * waiting for each call to be acknowledged before proceeding
1222 * ASYNC: function call is queued to the specified cpus
1223 * waiting for all calls to complete in parallel before returning
1224 * NOSYNC: function calls are queued
1225 * but we return before confirmation of calls completing.
1226 * The action function may be NULL.
1227 * The cpu mask may include the local cpu. Offline cpus are ignored.
1228 * The return value is the number of cpus on which the call was made or queued.
1229 */
1230cpu_t
1231mp_cpus_call(
1232 cpumask_t cpus,
1233 mp_sync_t mode,
1234 void (*action_func)(void *),
1235 void *arg)
1236{
1237 return mp_cpus_call1(
1238 cpus,
1239 mode,
1240 (void (*)(void *, void *))action_func,
1241 arg,
1242 NULL,
1243 NULL);
1244}
1245
1246static void
1247mp_cpus_call_wait(boolean_t intrs_enabled,
1248 cpumask_t cpus_called,
1249 cpumask_t *cpus_responded)
1250{
1251 mp_call_queue_t *cqp;
1252 uint64_t tsc_spin_start;
1253
1254 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1255 cqp = &mp_cpus_call_head[cpu_number()];
1256
1257 tsc_spin_start = rdtsc64();
1258 while (*cpus_responded != cpus_called) {
1259 if (!intrs_enabled) {
1260 /* Sniffing w/o locking */
1261 if (!queue_empty(&cqp->queue)) {
1262 mp_cpus_call_action();
1263 }
1264 cpu_signal_handler(NULL);
1265 }
1266 if (mp_spin_timeout(tsc_spin_start)) {
1267 cpumask_t cpus_unresponsive;
1268
1269 cpus_unresponsive = cpus_called & ~(*cpus_responded);
1270 NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
1271 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1272 cpus_unresponsive);
1273 }
1274 }
1275}
1276
1277cpu_t
1278mp_cpus_call1(
1279 cpumask_t cpus,
1280 mp_sync_t mode,
1281 void (*action_func)(void *, void *),
1282 void *arg0,
1283 void *arg1,
1284 cpumask_t *cpus_calledp)
1285{
1286 cpu_t cpu = 0;
1287 boolean_t intrs_enabled = FALSE;
1288 boolean_t call_self = FALSE;
1289 cpumask_t cpus_called = 0;
1290 cpumask_t cpus_responded = 0;
1291 long cpus_call_count = 0;
1292 uint64_t tsc_spin_start;
1293 boolean_t topo_lock;
1294
1295 KERNEL_DEBUG_CONSTANT(
1296 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1297 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
1298
1299 if (!smp_initialized) {
1300 if ((cpus & CPUMASK_SELF) == 0) {
1301 goto out;
1302 }
1303 if (action_func != NULL) {
1304 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1305 action_func(arg0, arg1);
1306 ml_set_interrupts_enabled(intrs_enabled);
1307 }
1308 call_self = TRUE;
1309 goto out;
1310 }
1311
1312 /*
1313 * Queue the call for each non-local requested cpu.
1314 * This is performed under the topo lock to prevent changes to
1315 * cpus online state and to prevent concurrent rendezvouses --
1316 * although an exception is made if we're calling only the master
1317 * processor since that always remains active. Note: this exception
1318 * is expected for longterm timer nosync cross-calls to the master cpu.
1319 */
1320 mp_disable_preemption();
1321 intrs_enabled = ml_get_interrupts_enabled();
1322 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1323 if (topo_lock) {
1324 ml_set_interrupts_enabled(FALSE);
1325 (void) mp_safe_spin_lock(&x86_topo_lock);
1326 }
1327 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1328 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1329 !cpu_is_running(cpu)) {
1330 continue;
1331 }
1332 tsc_spin_start = rdtsc64();
1333 if (cpu == (cpu_t) cpu_number()) {
1334 /*
1335 * We don't IPI ourself and if calling asynchronously,
1336 * we defer our call until we have signalled all others.
1337 */
1338 call_self = TRUE;
1339 if (mode == SYNC && action_func != NULL) {
1340 KERNEL_DEBUG_CONSTANT(
1341 TRACE_MP_CPUS_CALL_LOCAL,
1342 VM_KERNEL_UNSLIDE(action_func),
1343 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1344 action_func(arg0, arg1);
1345 }
1346 } else {
1347 /*
1348 * Here to queue a call to cpu and IPI.
1349 */
1350 mp_call_t *callp = NULL;
1351 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1352 boolean_t intrs_inner;
1353
1354queue_call:
1355 if (callp == NULL) {
1356 callp = mp_call_alloc();
1357 }
1358 intrs_inner = mp_call_head_lock(cqp);
1359 if (callp == NULL) {
1360 mp_call_head_unlock(cqp, intrs_inner);
1361 KERNEL_DEBUG_CONSTANT(
1362 TRACE_MP_CPUS_CALL_NOBUF,
1363 cpu, 0, 0, 0, 0);
1364 if (!intrs_inner) {
1365 /* Sniffing w/o locking */
1366 if (!queue_empty(&cqp->queue)) {
1367 mp_cpus_call_action();
1368 }
1369 handle_pending_TLB_flushes();
1370 }
1371 if (mp_spin_timeout(tsc_spin_start)) {
1372 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1373 tsc_spin_start, rdtsc64());
1374 }
1375 goto queue_call;
1376 }
1377 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
1378 callp->func = action_func;
1379 callp->arg0 = arg0;
1380 callp->arg1 = arg1;
1381 mp_call_enqueue_locked(cqp, callp);
1382 cpus_call_count++;
1383 cpus_called |= cpu_to_cpumask(cpu);
1384 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1385 mp_call_head_unlock(cqp, intrs_inner);
1386 if (mode == SYNC) {
1387 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
1388 }
1389 }
1390 }
1391 if (topo_lock) {
1392 simple_unlock(&x86_topo_lock);
1393 ml_set_interrupts_enabled(intrs_enabled);
1394 }
1395
1396 /* Call locally if mode not SYNC */
1397 if (mode != SYNC && call_self) {
1398 KERNEL_DEBUG_CONSTANT(
1399 TRACE_MP_CPUS_CALL_LOCAL,
1400 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1401 if (action_func != NULL) {
1402 ml_set_interrupts_enabled(FALSE);
1403 action_func(arg0, arg1);
1404 ml_set_interrupts_enabled(intrs_enabled);
1405 }
1406 }
1407
1408 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1409 if (mode == ASYNC) {
1410 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
1411 }
1412
1413 /* Safe to allow pre-emption now */
1414 mp_enable_preemption();
1415
1416out:
1417 if (call_self) {
1418 cpus_called |= cpu_to_cpumask(cpu);
1419 cpus_call_count++;
1420 }
1421
1422 if (cpus_calledp) {
1423 *cpus_calledp = cpus_called;
1424 }
1425
1426 KERNEL_DEBUG_CONSTANT(
1427 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1428 cpus_call_count, cpus_called, 0, 0, 0);
1429
1430 return (cpu_t) cpus_call_count;
1431}
1432
1433
1434static void
1435mp_broadcast_action(__unused void *null)
1436{
1437 /* call action function */
1438 if (mp_bc_action_func != NULL) {
1439 mp_bc_action_func(mp_bc_func_arg);
1440 }
1441
1442 /* if we're the last one through, wake up the instigator */
1443 if (atomic_decl_and_test(&mp_bc_count, 1)) {
1444 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1445 }
1446}
1447
1448/*
1449 * mp_broadcast() runs a given function on all active cpus.
1450 * The caller blocks until the functions has run on all cpus.
1451 * The caller will also block if there is another pending broadcast.
1452 */
1453void
1454mp_broadcast(
1455 void (*action_func)(void *),
1456 void *arg)
1457{
1458 if (!smp_initialized) {
1459 if (action_func != NULL) {
1460 action_func(arg);
1461 }
1462 return;
1463 }
1464
1465 /* obtain broadcast lock */
1466 lck_mtx_lock(&mp_bc_lock);
1467
1468 /* set static function pointers */
1469 mp_bc_action_func = action_func;
1470 mp_bc_func_arg = arg;
1471
1472 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1473
1474 /*
1475 * signal other processors, which will call mp_broadcast_action()
1476 */
1477 mp_bc_count = real_ncpus; /* assume max possible active */
1478 mp_bc_ncpus = mp_cpus_call(CPUMASK_ALL, NOSYNC, *mp_broadcast_action, NULL);
1479 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
1480
1481 /* block for other cpus to have run action_func */
1482 if (mp_bc_ncpus > 1) {
1483 thread_block(THREAD_CONTINUE_NULL);
1484 } else {
1485 clear_wait(current_thread(), THREAD_AWAKENED);
1486 }
1487
1488 /* release lock */
1489 lck_mtx_unlock(&mp_bc_lock);
1490}
1491
1492void
1493mp_cpus_kick(cpumask_t cpus)
1494{
1495 cpu_t cpu;
1496 boolean_t intrs_enabled = FALSE;
1497
1498 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1499 mp_safe_spin_lock(&x86_topo_lock);
1500
1501 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1502 if (((cpu_to_cpumask(cpu) & cpus) == 0)
1503 || !cpu_is_running(cpu)) {
1504 continue;
1505 }
1506
1507 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1508 }
1509
1510 simple_unlock(&x86_topo_lock);
1511 ml_set_interrupts_enabled(intrs_enabled);
1512}
1513
1514void
1515i386_activate_cpu(void)
1516{
1517 cpu_data_t *cdp = current_cpu_datap();
1518
1519 assert(!ml_get_interrupts_enabled());
1520
1521 if (!smp_initialized) {
1522 cdp->cpu_running = TRUE;
1523 return;
1524 }
1525
1526 mp_safe_spin_lock(&x86_topo_lock);
1527 cdp->cpu_running = TRUE;
1528 started_cpu();
1529 pmap_tlbi_range(0, ~0ULL, true, 0);
1530 simple_unlock(&x86_topo_lock);
1531}
1532
1533void
1534i386_deactivate_cpu(void)
1535{
1536 cpu_data_t *cdp = current_cpu_datap();
1537
1538 assert(!ml_get_interrupts_enabled());
1539
1540 KERNEL_DEBUG_CONSTANT(
1541 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1542 0, 0, 0, 0, 0);
1543
1544 mp_safe_spin_lock(&x86_topo_lock);
1545 cdp->cpu_running = FALSE;
1546 simple_unlock(&x86_topo_lock);
1547
1548 /*
1549 * Move all of this cpu's timers to the master/boot cpu,
1550 * and poke it in case there's a sooner deadline for it to schedule.
1551 */
1552 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1553 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
1554
1555#if MONOTONIC
1556 mt_cpu_down(cdp);
1557#endif /* MONOTONIC */
1558
1559 /*
1560 * Open an interrupt window
1561 * and ensure any pending IPI or timer is serviced
1562 */
1563 mp_disable_preemption();
1564 ml_set_interrupts_enabled(TRUE);
1565
1566 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime) {
1567 cpu_pause();
1568 }
1569 /*
1570 * Ensure there's no remaining timer deadline set
1571 * - AICPM may have left one active.
1572 */
1573 setPop(0);
1574
1575 ml_set_interrupts_enabled(FALSE);
1576 mp_enable_preemption();
1577
1578 KERNEL_DEBUG_CONSTANT(
1579 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1580 0, 0, 0, 0, 0);
1581}
1582
1583int pmsafe_debug = 1;
1584
1585#if MACH_KDP
1586volatile boolean_t mp_kdp_trap = FALSE;
1587volatile boolean_t mp_kdp_is_NMI = FALSE;
1588volatile unsigned long mp_kdp_ncpus;
1589boolean_t mp_kdp_state;
1590
1591
1592void
1593mp_kdp_enter(boolean_t proceed_on_failure)
1594{
1595 unsigned int cpu;
1596 unsigned int ncpus = 0;
1597 unsigned int my_cpu;
1598 uint64_t tsc_timeout;
1599
1600 DBG("mp_kdp_enter()\n");
1601
1602 /*
1603 * Here to enter the debugger.
1604 * In case of races, only one cpu is allowed to enter kdp after
1605 * stopping others.
1606 */
1607 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1608 my_cpu = cpu_number();
1609
1610 if (my_cpu == (unsigned) debugger_cpu) {
1611 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1612 kdp_reset();
1613 return;
1614 }
1615
1616 uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1617 int locked = 0;
1618 while (!locked || mp_kdp_trap) {
1619 if (locked) {
1620 simple_unlock(&x86_topo_lock);
1621 }
1622 if (proceed_on_failure) {
1623 if (mach_absolute_time() - start_time > 500000000ll) {
1624 paniclog_append_noflush("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1625 break;
1626 }
1627 locked = simple_lock_try(&x86_topo_lock, LCK_GRP_NULL);
1628 if (!locked) {
1629 cpu_pause();
1630 }
1631 } else {
1632 mp_safe_spin_lock(&x86_topo_lock);
1633 locked = TRUE;
1634 }
1635
1636 if (locked && mp_kdp_trap) {
1637 simple_unlock(&x86_topo_lock);
1638 DBG("mp_kdp_enter() race lost\n");
1639#if MACH_KDP
1640 mp_kdp_wait(TRUE, FALSE);
1641#endif
1642 locked = FALSE;
1643 }
1644 }
1645
1646 if (pmsafe_debug && !kdp_snapshot) {
1647 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1648 }
1649
1650 debugger_cpu = my_cpu;
1651 ncpus = 1;
1652 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1653 mp_kdp_trap = TRUE;
1654 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1655
1656 /*
1657 * Deliver a nudge to other cpus, counting how many
1658 */
1659 DBG("mp_kdp_enter() signaling other processors\n");
1660 if (force_immediate_debugger_NMI == FALSE) {
1661 for (cpu = 0; cpu < real_ncpus; cpu++) {
1662 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1663 continue;
1664 }
1665 ncpus++;
1666 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1667 }
1668 /*
1669 * Wait other processors to synchronize
1670 */
1671 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1672
1673 /*
1674 * This timeout is rather arbitrary; we don't want to NMI
1675 * processors that are executing at potentially
1676 * "unsafe-to-interrupt" points such as the trampolines,
1677 * but neither do we want to lose state by waiting too long.
1678 */
1679 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1680
1681 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1682 /*
1683 * A TLB shootdown request may be pending--this would
1684 * result in the requesting processor waiting in
1685 * PMAP_UPDATE_TLBS() until this processor deals with it.
1686 * Process it, so it can now enter mp_kdp_wait()
1687 */
1688 handle_pending_TLB_flushes();
1689 cpu_pause();
1690 }
1691 /* If we've timed out, and some processor(s) are still unresponsive,
1692 * interrupt them with an NMI via the local APIC, iff a panic is
1693 * in progress.
1694 */
1695 if (panic_active()) {
1696 NMIPI_enable(TRUE);
1697 }
1698 if (mp_kdp_ncpus != ncpus) {
1699 unsigned int wait_cycles = 0;
1700 if (proceed_on_failure) {
1701 paniclog_append_noflush("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1702 } else {
1703 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1704 }
1705 for (cpu = 0; cpu < real_ncpus; cpu++) {
1706 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1707 continue;
1708 }
1709 if (cpu_signal_pending(cpu, MP_KDP)) {
1710 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1711 cpu_NMI_interrupt(cpu);
1712 }
1713 }
1714 /* Wait again for the same timeout */
1715 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1716 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1717 handle_pending_TLB_flushes();
1718 cpu_pause();
1719 ++wait_cycles;
1720 }
1721 if (mp_kdp_ncpus != ncpus) {
1722 paniclog_append_noflush("mp_kdp_enter() NMI pending on cpus:");
1723 for (cpu = 0; cpu < real_ncpus; cpu++) {
1724 if (cpu_is_running(cpu) && !cpu_datap(cpu)->cpu_NMI_acknowledged) {
1725 paniclog_append_noflush(" %d", cpu);
1726 }
1727 }
1728 paniclog_append_noflush("\n");
1729 if (proceed_on_failure) {
1730 paniclog_append_noflush("mp_kdp_enter() timed-out during %s wait after NMI;"
1731 "expected %u acks but received %lu after %u loops in %llu ticks\n",
1732 (locked ? "locked" : "unlocked"), ncpus, mp_kdp_ncpus, wait_cycles, LockTimeOutTSC);
1733 } else {
1734 panic("mp_kdp_enter() timed-out during %s wait after NMI;"
1735 "expected %u acks but received %lu after %u loops in %llu ticks",
1736 (locked ? "locked" : "unlocked"), ncpus, mp_kdp_ncpus, wait_cycles, LockTimeOutTSC);
1737 }
1738 }
1739 }
1740 } else {
1741 for (cpu = 0; cpu < real_ncpus; cpu++) {
1742 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1743 continue;
1744 }
1745 cpu_NMI_interrupt(cpu);
1746 }
1747 }
1748
1749 if (locked) {
1750 simple_unlock(&x86_topo_lock);
1751 }
1752
1753 DBG("mp_kdp_enter() %d processors done %s\n",
1754 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1755
1756 postcode(MP_KDP_ENTER);
1757}
1758
1759boolean_t
1760mp_kdp_all_cpus_halted()
1761{
1762 unsigned int ncpus = 0, cpu = 0, my_cpu = 0;
1763
1764 my_cpu = cpu_number();
1765 ncpus = 1; /* current CPU */
1766 for (cpu = 0; cpu < real_ncpus; cpu++) {
1767 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1768 continue;
1769 }
1770 ncpus++;
1771 }
1772
1773 return mp_kdp_ncpus == ncpus;
1774}
1775
1776static boolean_t
1777cpu_signal_pending(int cpu, mp_event_t event)
1778{
1779 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1780 boolean_t retval = FALSE;
1781
1782 if (i_bit(event, signals)) {
1783 retval = TRUE;
1784 }
1785 return retval;
1786}
1787
1788long
1789kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1790 void *arg0, void *arg1)
1791{
1792 if (lcpu > (real_ncpus - 1)) {
1793 return -1;
1794 }
1795
1796 if (func == NULL) {
1797 return -1;
1798 }
1799
1800 kdp_xcpu_call_func.func = func;
1801 kdp_xcpu_call_func.ret = -1;
1802 kdp_xcpu_call_func.arg0 = arg0;
1803 kdp_xcpu_call_func.arg1 = arg1;
1804 kdp_xcpu_call_func.cpu = lcpu;
1805 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1806 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE) {
1807 cpu_pause();
1808 }
1809 return kdp_xcpu_call_func.ret;
1810}
1811
1812static void
1813kdp_x86_xcpu_poll(void)
1814{
1815 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1816 kdp_xcpu_call_func.ret =
1817 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1818 kdp_xcpu_call_func.arg1,
1819 cpu_number());
1820 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1821 }
1822}
1823
1824static void
1825mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1826{
1827 DBG("mp_kdp_wait()\n");
1828
1829 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1830#if CONFIG_MCA
1831 /* If we've trapped due to a machine-check, save MCA registers */
1832 mca_check_save();
1833#endif
1834
1835 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1836 while (mp_kdp_trap || (isNMI == TRUE)) {
1837 /*
1838 * A TLB shootdown request may be pending--this would result
1839 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1840 * until this processor handles it.
1841 * Process it, so it can now enter mp_kdp_wait()
1842 */
1843 if (flush) {
1844 handle_pending_TLB_flushes();
1845 }
1846
1847 kdp_x86_xcpu_poll();
1848 cpu_pause();
1849 }
1850
1851 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1852 DBG("mp_kdp_wait() done\n");
1853}
1854
1855void
1856mp_kdp_exit(void)
1857{
1858 DBG("mp_kdp_exit()\n");
1859 debugger_cpu = -1;
1860 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1861
1862 debugger_exit_time = mach_absolute_time();
1863
1864 mp_kdp_trap = FALSE;
1865 mfence();
1866
1867 /* Wait other processors to stop spinning. XXX needs timeout */
1868 DBG("mp_kdp_exit() waiting for processors to resume\n");
1869 while (mp_kdp_ncpus > 0) {
1870 /*
1871 * a TLB shootdown request may be pending... this would result in the requesting
1872 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1873 * Process it, so it can now enter mp_kdp_wait()
1874 */
1875 handle_pending_TLB_flushes();
1876
1877 cpu_pause();
1878 }
1879
1880 if (pmsafe_debug && !kdp_snapshot) {
1881 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1882 }
1883
1884 debugger_exit_time = mach_absolute_time();
1885
1886 DBG("mp_kdp_exit() done\n");
1887 (void) ml_set_interrupts_enabled(mp_kdp_state);
1888 postcode(MP_KDP_EXIT);
1889}
1890
1891#endif /* MACH_KDP */
1892
1893boolean_t
1894mp_recent_debugger_activity(void)
1895{
1896 uint64_t abstime = mach_absolute_time();
1897 return ((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1898 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance);
1899}
1900
1901/*ARGSUSED*/
1902void
1903init_ast_check(
1904 __unused processor_t processor)
1905{
1906}
1907
1908void
1909cause_ast_check(
1910 processor_t processor)
1911{
1912 int cpu = processor->cpu_id;
1913
1914 if (cpu != cpu_number()) {
1915 i386_signal_cpu(cpu, MP_AST, ASYNC);
1916 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1917 }
1918}
1919
1920void
1921slave_machine_init(void *param)
1922{
1923 /*
1924 * Here in process context, but with interrupts disabled.
1925 */
1926 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1927
1928 if (param == FULL_SLAVE_INIT) {
1929 /*
1930 * Cold start
1931 */
1932 clock_init();
1933 }
1934 cpu_machine_init(); /* Interrupts enabled hereafter */
1935}
1936
1937#undef cpu_number
1938int
1939cpu_number(void)
1940{
1941 return get_cpu_number();
1942}
1943
1944vm_offset_t
1945current_percpu_base(void)
1946{
1947 return get_current_percpu_base();
1948}
1949
1950static void
1951cpu_prewarm_init()
1952{
1953 int i;
1954
1955 simple_lock_init(&cpu_warm_lock, 0);
1956 queue_init(&cpu_warm_call_list);
1957 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1958 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1959 }
1960}
1961
1962static timer_call_t
1963grab_warm_timer_call()
1964{
1965 spl_t x;
1966 timer_call_t call = NULL;
1967
1968 x = splsched();
1969 simple_lock(&cpu_warm_lock, LCK_GRP_NULL);
1970 if (!queue_empty(&cpu_warm_call_list)) {
1971 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1972 }
1973 simple_unlock(&cpu_warm_lock);
1974 splx(x);
1975
1976 return call;
1977}
1978
1979static void
1980free_warm_timer_call(timer_call_t call)
1981{
1982 spl_t x;
1983
1984 x = splsched();
1985 simple_lock(&cpu_warm_lock, LCK_GRP_NULL);
1986 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1987 simple_unlock(&cpu_warm_lock);
1988 splx(x);
1989}
1990
1991/*
1992 * Runs in timer call context (interrupts disabled).
1993 */
1994static void
1995cpu_warm_timer_call_func(
1996 timer_call_param_t p0,
1997 __unused timer_call_param_t p1)
1998{
1999 free_warm_timer_call((timer_call_t)p0);
2000 return;
2001}
2002
2003/*
2004 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
2005 */
2006static void
2007_cpu_warm_setup(
2008 void *arg)
2009{
2010 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
2011
2012 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
2013 cwdp->cwd_result = 0;
2014
2015 return;
2016}
2017
2018/*
2019 * Not safe to call with interrupts disabled.
2020 */
2021kern_return_t
2022ml_interrupt_prewarm(
2023 uint64_t deadline)
2024{
2025 struct cpu_warm_data cwd;
2026 timer_call_t call;
2027 cpu_t ct;
2028
2029 if (ml_get_interrupts_enabled() == FALSE) {
2030 panic("%s: Interrupts disabled?\n", __FUNCTION__);
2031 }
2032
2033 /*
2034 * If the platform doesn't need our help, say that we succeeded.
2035 */
2036 if (!ml_get_interrupt_prewake_applicable()) {
2037 return KERN_SUCCESS;
2038 }
2039
2040 /*
2041 * Grab a timer call to use.
2042 */
2043 call = grab_warm_timer_call();
2044 if (call == NULL) {
2045 return KERN_RESOURCE_SHORTAGE;
2046 }
2047
2048 timer_call_setup(call, cpu_warm_timer_call_func, call);
2049 cwd.cwd_call = call;
2050 cwd.cwd_deadline = deadline;
2051 cwd.cwd_result = 0;
2052
2053 /*
2054 * For now, non-local interrupts happen on the master processor.
2055 */
2056 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
2057 if (ct == 0) {
2058 free_warm_timer_call(call);
2059 return KERN_FAILURE;
2060 } else {
2061 return cwd.cwd_result;
2062 }
2063}
2064
2065#if DEBUG || DEVELOPMENT
2066void
2067kernel_spin(uint64_t spin_ns)
2068{
2069 boolean_t istate;
2070 uint64_t spin_abs;
2071 uint64_t deadline;
2072 cpu_data_t *cdp;
2073
2074 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
2075 istate = ml_set_interrupts_enabled(FALSE);
2076 cdp = current_cpu_datap();
2077 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
2078
2079 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
2080 cdp->cpu_int_event_time = mach_absolute_time();
2081 cdp->cpu_int_state = (void *) USER_STATE(current_thread());
2082
2083 deadline = mach_absolute_time() + spin_ns;
2084 while (mach_absolute_time() < deadline) {
2085 cpu_pause();
2086 }
2087
2088 cdp->cpu_int_event_time = 0;
2089 cdp->cpu_int_state = NULL;
2090
2091 ml_set_interrupts_enabled(istate);
2092 kprintf("kernel_spin() continuing\n");
2093}
2094
2095/*
2096 * Called from the scheduler's maintenance thread,
2097 * scan running processors for long-running ISRs and:
2098 * - panic if longer than LockTimeOut, or
2099 * - log if more than a quantum.
2100 */
2101void
2102mp_interrupt_watchdog(void)
2103{
2104 cpu_t cpu;
2105 boolean_t intrs_enabled = FALSE;
2106 uint16_t cpu_int_num;
2107 uint64_t cpu_int_event_time;
2108 uint64_t cpu_rip;
2109 uint64_t cpu_int_duration;
2110 uint64_t now;
2111 x86_saved_state_t *cpu_int_state;
2112
2113 if (__improbable(!mp_interrupt_watchdog_enabled)) {
2114 return;
2115 }
2116
2117 intrs_enabled = ml_set_interrupts_enabled(FALSE);
2118 now = mach_absolute_time();
2119 /*
2120 * While timeouts are not suspended,
2121 * check all other processors for long outstanding interrupt handling.
2122 */
2123 for (cpu = 0;
2124 cpu < (cpu_t) real_ncpus && !machine_timeout_suspended();
2125 cpu++) {
2126 if ((cpu == (cpu_t) cpu_number()) ||
2127 (!cpu_is_running(cpu))) {
2128 continue;
2129 }
2130 cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time;
2131 if (cpu_int_event_time == 0) {
2132 continue;
2133 }
2134 if (__improbable(now < cpu_int_event_time)) {
2135 continue; /* skip due to inter-processor skew */
2136 }
2137 cpu_int_state = cpu_datap(cpu)->cpu_int_state;
2138 if (__improbable(cpu_int_state == NULL)) {
2139 /* The interrupt may have been dismissed */
2140 continue;
2141 }
2142
2143 /* Here with a cpu handling an interrupt */
2144
2145 cpu_int_duration = now - cpu_int_event_time;
2146 if (__improbable(cpu_int_duration > LockTimeOut)) {
2147 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2148 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2149 vector_timed_out = cpu_int_num;
2150 NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG);
2151 panic("Interrupt watchdog, "
2152 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2153 cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip);
2154 /* NOT REACHED */
2155 } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) {
2156 mp_interrupt_watchdog_events++;
2157 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2158 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2159 ml_set_interrupts_enabled(intrs_enabled);
2160 printf("Interrupt watchdog, "
2161 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2162 cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip);
2163 return;
2164 }
2165 }
2166
2167 ml_set_interrupts_enabled(intrs_enabled);
2168}
2169#endif