]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
xnu-4570.51.1.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #include <mach_kdp.h>
33 #include <kdp/kdp_internal.h>
34 #include <mach_ldebug.h>
35 #include <gprof.h>
36
37 #include <mach/mach_types.h>
38 #include <mach/kern_return.h>
39
40 #include <kern/kern_types.h>
41 #include <kern/startup.h>
42 #include <kern/timer_queue.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53 #include <prng/random.h>
54
55 #include <vm/vm_map.h>
56 #include <vm/vm_kern.h>
57
58 #include <profiling/profile-mk.h>
59
60 #include <i386/bit_routines.h>
61 #include <i386/proc_reg.h>
62 #include <i386/cpu_threads.h>
63 #include <i386/mp_desc.h>
64 #include <i386/misc_protos.h>
65 #include <i386/trap.h>
66 #include <i386/postcode.h>
67 #include <i386/machine_routines.h>
68 #include <i386/mp.h>
69 #include <i386/mp_events.h>
70 #include <i386/lapic.h>
71 #include <i386/cpuid.h>
72 #include <i386/fpu.h>
73 #include <i386/machine_cpu.h>
74 #include <i386/pmCPU.h>
75 #if CONFIG_MCA
76 #include <i386/machine_check.h>
77 #endif
78 #include <i386/acpi.h>
79
80 #include <sys/kdebug.h>
81
82 #include <console/serial_protos.h>
83
84 #if MONOTONIC
85 #include <kern/monotonic.h>
86 #endif /* MONOTONIC */
87
88 #if MP_DEBUG
89 #define PAUSE delay(1000000)
90 #define DBG(x...) kprintf(x)
91 #else
92 #define DBG(x...)
93 #define PAUSE
94 #endif /* MP_DEBUG */
95
96 /* Debugging/test trace events: */
97 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
98 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
99 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
100 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
101 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
102 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
103 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
104 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
105
106 #define ABS(v) (((v) > 0)?(v):-(v))
107
108 void slave_boot_init(void);
109 void i386_cpu_IPI(int cpu);
110
111 #if MACH_KDP
112 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
113 #endif /* MACH_KDP */
114
115 #if MACH_KDP
116 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
117 #endif /* MACH_KDP */
118 static int NMIInterruptHandler(x86_saved_state_t *regs);
119
120 boolean_t smp_initialized = FALSE;
121 uint32_t TSC_sync_margin = 0xFFF;
122 volatile boolean_t force_immediate_debugger_NMI = FALSE;
123 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
124 #if DEBUG || DEVELOPMENT
125 boolean_t mp_interrupt_watchdog_enabled = TRUE;
126 uint32_t mp_interrupt_watchdog_events = 0;
127 #endif
128
129 decl_simple_lock_data(,debugger_callback_lock);
130 struct debugger_callback *debugger_callback = NULL;
131
132 decl_lck_mtx_data(static, mp_cpu_boot_lock);
133 lck_mtx_ext_t mp_cpu_boot_lock_ext;
134
135 /* Variables needed for MP rendezvous. */
136 decl_simple_lock_data(,mp_rv_lock);
137 static void (*mp_rv_setup_func)(void *arg);
138 static void (*mp_rv_action_func)(void *arg);
139 static void (*mp_rv_teardown_func)(void *arg);
140 static void *mp_rv_func_arg;
141 static volatile int mp_rv_ncpus;
142 /* Cache-aligned barriers: */
143 static volatile long mp_rv_entry __attribute__((aligned(64)));
144 static volatile long mp_rv_exit __attribute__((aligned(64)));
145 static volatile long mp_rv_complete __attribute__((aligned(64)));
146
147 volatile uint64_t debugger_entry_time;
148 volatile uint64_t debugger_exit_time;
149 #if MACH_KDP
150 #include <kdp/kdp.h>
151 extern int kdp_snapshot;
152 static struct _kdp_xcpu_call_func {
153 kdp_x86_xcpu_func_t func;
154 void *arg0, *arg1;
155 volatile long ret;
156 volatile uint16_t cpu;
157 } kdp_xcpu_call_func = {
158 .cpu = KDP_XCPU_NONE
159 };
160
161 #endif
162
163 /* Variables needed for MP broadcast. */
164 static void (*mp_bc_action_func)(void *arg);
165 static void *mp_bc_func_arg;
166 static int mp_bc_ncpus;
167 static volatile long mp_bc_count;
168 decl_lck_mtx_data(static, mp_bc_lock);
169 lck_mtx_ext_t mp_bc_lock_ext;
170 static volatile int debugger_cpu = -1;
171 volatile long NMIPI_acks = 0;
172 volatile long NMI_count = 0;
173 static NMI_reason_t NMI_panic_reason = NONE;
174 static int vector_timed_out;
175
176 extern void NMI_cpus(void);
177
178 static void mp_cpus_call_init(void);
179 static void mp_cpus_call_action(void);
180 static void mp_call_PM(void);
181
182 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
183
184 /* PAL-related routines */
185 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
186 int ipi_vector, i386_intr_func_t ipi_handler);
187 void i386_start_cpu(int lapic_id, int cpu_num);
188 void i386_send_NMI(int cpu);
189 void NMIPI_enable(boolean_t);
190 #if GPROF
191 /*
192 * Initialize dummy structs for profiling. These aren't used but
193 * allows hertz_tick() to be built with GPROF defined.
194 */
195 struct profile_vars _profile_vars;
196 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
197 #define GPROF_INIT() \
198 { \
199 int i; \
200 \
201 /* Hack to initialize pointers to unused profiling structs */ \
202 for (i = 1; i < MAX_CPUS; i++) \
203 _profile_vars_cpus[i] = &_profile_vars; \
204 }
205 #else
206 #define GPROF_INIT()
207 #endif /* GPROF */
208
209 static lck_grp_t smp_lck_grp;
210 static lck_grp_attr_t smp_lck_grp_attr;
211
212 #define NUM_CPU_WARM_CALLS 20
213 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
214 queue_head_t cpu_warm_call_list;
215 decl_simple_lock_data(static, cpu_warm_lock);
216
217 typedef struct cpu_warm_data {
218 timer_call_t cwd_call;
219 uint64_t cwd_deadline;
220 int cwd_result;
221 } *cpu_warm_data_t;
222
223 static void cpu_prewarm_init(void);
224 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
225 static void _cpu_warm_setup(void *arg);
226 static timer_call_t grab_warm_timer_call(void);
227 static void free_warm_timer_call(timer_call_t call);
228
229 void
230 smp_init(void)
231 {
232 simple_lock_init(&mp_rv_lock, 0);
233 simple_lock_init(&debugger_callback_lock, 0);
234 lck_grp_attr_setdefault(&smp_lck_grp_attr);
235 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
236 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
237 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
238 console_init();
239
240 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
241 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
242 return;
243
244 cpu_thread_init();
245
246 GPROF_INIT();
247 DBGLOG_CPU_INIT(master_cpu);
248
249 mp_cpus_call_init();
250 mp_cpus_call_cpu_init(master_cpu);
251
252 #if DEBUG || DEVELOPMENT
253 if (PE_parse_boot_argn("interrupt_watchdog",
254 &mp_interrupt_watchdog_enabled,
255 sizeof(mp_interrupt_watchdog_enabled))) {
256 kprintf("Interrupt watchdog %sabled\n",
257 mp_interrupt_watchdog_enabled ? "en" : "dis");
258 }
259 #endif
260
261 if (PE_parse_boot_argn("TSC_sync_margin",
262 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
263 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
264 } else if (cpuid_vmm_present()) {
265 kprintf("TSC sync margin disabled\n");
266 TSC_sync_margin = 0;
267 }
268 smp_initialized = TRUE;
269
270 cpu_prewarm_init();
271
272 return;
273 }
274
275 typedef struct {
276 int target_cpu;
277 int target_lapic;
278 int starter_cpu;
279 } processor_start_info_t;
280 static processor_start_info_t start_info __attribute__((aligned(64)));
281
282 /*
283 * Cache-alignment is to avoid cross-cpu false-sharing interference.
284 */
285 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
286 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
287 static volatile uint64_t tsc_target __attribute__((aligned(64)));
288
289 /*
290 * Poll a CPU to see when it has marked itself as running.
291 */
292 static void
293 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
294 {
295 while (iters-- > 0) {
296 if (cpu_datap(slot_num)->cpu_running)
297 break;
298 delay(usecdelay);
299 }
300 }
301
302 /*
303 * Quickly bring a CPU back online which has been halted.
304 */
305 kern_return_t
306 intel_startCPU_fast(int slot_num)
307 {
308 kern_return_t rc;
309
310 /*
311 * Try to perform a fast restart
312 */
313 rc = pmCPUExitHalt(slot_num);
314 if (rc != KERN_SUCCESS)
315 /*
316 * The CPU was not eligible for a fast restart.
317 */
318 return(rc);
319
320 KERNEL_DEBUG_CONSTANT(
321 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
322 slot_num, 0, 0, 0, 0);
323
324 /*
325 * Wait until the CPU is back online.
326 */
327 mp_disable_preemption();
328
329 /*
330 * We use short pauses (1us) for low latency. 30,000 iterations is
331 * longer than a full restart would require so it should be more
332 * than long enough.
333 */
334
335 mp_wait_for_cpu_up(slot_num, 30000, 1);
336 mp_enable_preemption();
337
338 KERNEL_DEBUG_CONSTANT(
339 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
340 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
341
342 /*
343 * Check to make sure that the CPU is really running. If not,
344 * go through the slow path.
345 */
346 if (cpu_datap(slot_num)->cpu_running)
347 return(KERN_SUCCESS);
348 else
349 return(KERN_FAILURE);
350 }
351
352 static void
353 started_cpu(void)
354 {
355 /* Here on the started cpu with cpu_running set TRUE */
356
357 if (TSC_sync_margin &&
358 start_info.target_cpu == cpu_number()) {
359 /*
360 * I've just started-up, synchronize again with the starter cpu
361 * and then snap my TSC.
362 */
363 tsc_target = 0;
364 atomic_decl(&tsc_entry_barrier, 1);
365 while (tsc_entry_barrier != 0)
366 ; /* spin for starter and target at barrier */
367 tsc_target = rdtsc64();
368 atomic_decl(&tsc_exit_barrier, 1);
369 }
370 }
371
372 static void
373 start_cpu(void *arg)
374 {
375 int i = 1000;
376 processor_start_info_t *psip = (processor_start_info_t *) arg;
377
378 /* Ignore this if the current processor is not the starter */
379 if (cpu_number() != psip->starter_cpu)
380 return;
381
382 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
383 arg, psip->target_cpu, psip->target_lapic);
384
385 KERNEL_DEBUG_CONSTANT(
386 TRACE_MP_CPU_START | DBG_FUNC_START,
387 psip->target_cpu,
388 psip->target_lapic, 0, 0, 0);
389
390 i386_start_cpu(psip->target_lapic, psip->target_cpu);
391
392 #ifdef POSTCODE_DELAY
393 /* Wait much longer if postcodes are displayed for a delay period. */
394 i *= 10000;
395 #endif
396 DBG("start_cpu(%p) about to wait for cpu %d\n",
397 arg, psip->target_cpu);
398
399 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
400
401 KERNEL_DEBUG_CONSTANT(
402 TRACE_MP_CPU_START | DBG_FUNC_END,
403 psip->target_cpu,
404 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
405
406 if (TSC_sync_margin &&
407 cpu_datap(psip->target_cpu)->cpu_running) {
408 /*
409 * Compare the TSC from the started processor with ours.
410 * Report and log/panic if it diverges by more than
411 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
412 * can be overriden by boot-arg (with 0 meaning no checking).
413 */
414 uint64_t tsc_starter;
415 int64_t tsc_delta;
416 atomic_decl(&tsc_entry_barrier, 1);
417 while (tsc_entry_barrier != 0)
418 ; /* spin for both processors at barrier */
419 tsc_starter = rdtsc64();
420 atomic_decl(&tsc_exit_barrier, 1);
421 while (tsc_exit_barrier != 0)
422 ; /* spin for target to store its TSC */
423 tsc_delta = tsc_target - tsc_starter;
424 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
425 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
426 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
427 #if DEBUG
428 panic(
429 #else
430 printf(
431 #endif
432 "Unsynchronized TSC for cpu %d: "
433 "0x%016llx, delta 0x%llx\n",
434 psip->target_cpu, tsc_target, tsc_delta);
435 }
436 }
437 }
438
439 kern_return_t
440 intel_startCPU(
441 int slot_num)
442 {
443 int lapic = cpu_to_lapic[slot_num];
444 boolean_t istate;
445
446 assert(lapic != -1);
447
448 DBGLOG_CPU_INIT(slot_num);
449
450 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
451 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
452
453 /*
454 * Initialize (or re-initialize) the descriptor tables for this cpu.
455 * Propagate processor mode to slave.
456 */
457 cpu_desc_init(cpu_datap(slot_num));
458
459 /* Serialize use of the slave boot stack, etc. */
460 lck_mtx_lock(&mp_cpu_boot_lock);
461
462 istate = ml_set_interrupts_enabled(FALSE);
463 if (slot_num == get_cpu_number()) {
464 ml_set_interrupts_enabled(istate);
465 lck_mtx_unlock(&mp_cpu_boot_lock);
466 return KERN_SUCCESS;
467 }
468
469 start_info.starter_cpu = cpu_number();
470 start_info.target_cpu = slot_num;
471 start_info.target_lapic = lapic;
472 tsc_entry_barrier = 2;
473 tsc_exit_barrier = 2;
474
475 /*
476 * Perform the processor startup sequence with all running
477 * processors rendezvous'ed. This is required during periods when
478 * the cache-disable bit is set for MTRR/PAT initialization.
479 */
480 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
481
482 start_info.target_cpu = 0;
483
484 ml_set_interrupts_enabled(istate);
485 lck_mtx_unlock(&mp_cpu_boot_lock);
486
487 if (!cpu_datap(slot_num)->cpu_running) {
488 kprintf("Failed to start CPU %02d\n", slot_num);
489 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
490 delay(1000000);
491 halt_cpu();
492 return KERN_SUCCESS;
493 } else {
494 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
495 return KERN_SUCCESS;
496 }
497 }
498
499 #if MP_DEBUG
500 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
501 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
502
503 MP_EVENT_NAME_DECL();
504
505 #endif /* MP_DEBUG */
506
507 /*
508 * Note: called with NULL state when polling for TLB flush and cross-calls.
509 */
510 int
511 cpu_signal_handler(x86_saved_state_t *regs)
512 {
513 #if !MACH_KDP
514 #pragma unused (regs)
515 #endif /* !MACH_KDP */
516 int my_cpu;
517 volatile int *my_word;
518
519 SCHED_STATS_IPI(current_processor());
520
521 my_cpu = cpu_number();
522 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
523 /* Store the initial set of signals for diagnostics. New
524 * signals could arrive while these are being processed
525 * so it's no more than a hint.
526 */
527
528 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
529
530 do {
531 #if MACH_KDP
532 if (i_bit(MP_KDP, my_word)) {
533 DBGLOG(cpu_handle,my_cpu,MP_KDP);
534 i_bit_clear(MP_KDP, my_word);
535 /* Ensure that the i386_kernel_state at the base of the
536 * current thread's stack (if any) is synchronized with the
537 * context at the moment of the interrupt, to facilitate
538 * access through the debugger.
539 */
540 sync_iss_to_iks(regs);
541 if (pmsafe_debug && !kdp_snapshot)
542 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
543 mp_kdp_wait(TRUE, FALSE);
544 if (pmsafe_debug && !kdp_snapshot)
545 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
546 } else
547 #endif /* MACH_KDP */
548 if (i_bit(MP_TLB_FLUSH, my_word)) {
549 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
550 i_bit_clear(MP_TLB_FLUSH, my_word);
551 pmap_update_interrupt();
552 } else if (i_bit(MP_CALL, my_word)) {
553 DBGLOG(cpu_handle,my_cpu,MP_CALL);
554 i_bit_clear(MP_CALL, my_word);
555 mp_cpus_call_action();
556 } else if (i_bit(MP_CALL_PM, my_word)) {
557 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
558 i_bit_clear(MP_CALL_PM, my_word);
559 mp_call_PM();
560 }
561 if (regs == NULL) {
562 /* Called to poll only for cross-calls and TLB flush */
563 break;
564 } else if (i_bit(MP_AST, my_word)) {
565 DBGLOG(cpu_handle,my_cpu,MP_AST);
566 i_bit_clear(MP_AST, my_word);
567 ast_check(cpu_to_processor(my_cpu));
568 }
569 } while (*my_word);
570
571 return 0;
572 }
573
574 extern void kprintf_break_lock(void);
575 static int
576 NMIInterruptHandler(x86_saved_state_t *regs)
577 {
578 void *stackptr;
579 char pstr[192];
580 uint64_t now = mach_absolute_time();
581
582 if (panic_active() && !panicDebugging) {
583 if (pmsafe_debug)
584 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
585 for(;;)
586 cpu_pause();
587 }
588
589 atomic_incl(&NMIPI_acks, 1);
590 atomic_incl(&NMI_count, 1);
591 sync_iss_to_iks_unconditionally(regs);
592 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
593
594 if (cpu_number() == debugger_cpu)
595 goto NMExit;
596
597 if (NMI_panic_reason == SPINLOCK_TIMEOUT) {
598 snprintf(&pstr[0], sizeof(pstr),
599 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
600 cpu_number(), now, spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
601 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
602 } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) {
603 snprintf(&pstr[0], sizeof(pstr),
604 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
605 cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid);
606 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
607 } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) {
608 snprintf(&pstr[0], sizeof(pstr),
609 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
610 cpu_number(), now);
611 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
612 } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) {
613 snprintf(&pstr[0], sizeof(pstr),
614 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
615 cpu_number(), now, vector_timed_out);
616 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
617 }
618
619 #if MACH_KDP
620 if (pmsafe_debug && !kdp_snapshot)
621 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
622 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
623 i_bit_clear(MP_KDP, &current_cpu_datap()->cpu_signals);
624 if (panic_active() || NMI_panic_reason != NONE) {
625 mp_kdp_wait(FALSE, TRUE);
626 } else if (!mp_kdp_trap &&
627 !mp_kdp_is_NMI &&
628 virtualized && (debug_boot_arg & DB_NMI)) {
629 /*
630 * Under a VMM with the debug boot-arg set, drop into kdp.
631 * Since an NMI is involved, there's a risk of contending with
632 * a panic. And side-effects of NMIs may result in entry into,
633 * and continuing from, the debugger being unreliable.
634 */
635 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
636 kprintf_break_lock();
637 kprintf("Debugger entry requested by NMI\n");
638 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0);
639 printf("Debugger entry requested by NMI\n");
640 mp_kdp_is_NMI = FALSE;
641 } else {
642 mp_kdp_wait(FALSE, FALSE);
643 }
644 } else {
645 mp_kdp_wait(FALSE, FALSE);
646 }
647 if (pmsafe_debug && !kdp_snapshot)
648 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
649 #endif
650 NMExit:
651 return 1;
652 }
653
654
655 /*
656 * cpu_interrupt is really just to be used by the scheduler to
657 * get a CPU's attention it may not always issue an IPI. If an
658 * IPI is always needed then use i386_cpu_IPI.
659 */
660 void
661 cpu_interrupt(int cpu)
662 {
663 boolean_t did_IPI = FALSE;
664
665 if (smp_initialized
666 && pmCPUExitIdle(cpu_datap(cpu))) {
667 i386_cpu_IPI(cpu);
668 did_IPI = TRUE;
669 }
670
671 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
672 }
673
674 /*
675 * Send a true NMI via the local APIC to the specified CPU.
676 */
677 void
678 cpu_NMI_interrupt(int cpu)
679 {
680 if (smp_initialized) {
681 i386_send_NMI(cpu);
682 }
683 }
684
685 void
686 NMI_cpus(void)
687 {
688 unsigned int cpu;
689 boolean_t intrs_enabled;
690 uint64_t tsc_timeout;
691
692 intrs_enabled = ml_set_interrupts_enabled(FALSE);
693
694 for (cpu = 0; cpu < real_ncpus; cpu++) {
695 if (!cpu_is_running(cpu))
696 continue;
697 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
698 cpu_NMI_interrupt(cpu);
699 tsc_timeout = !machine_timeout_suspended() ?
700 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
701 ~0ULL;
702 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
703 handle_pending_TLB_flushes();
704 cpu_pause();
705 if (rdtsc64() > tsc_timeout)
706 panic("NMI_cpus() timeout cpu %d", cpu);
707 }
708 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
709 }
710
711 ml_set_interrupts_enabled(intrs_enabled);
712 }
713
714 static void (* volatile mp_PM_func)(void) = NULL;
715
716 static void
717 mp_call_PM(void)
718 {
719 assert(!ml_get_interrupts_enabled());
720
721 if (mp_PM_func != NULL)
722 mp_PM_func();
723 }
724
725 void
726 cpu_PM_interrupt(int cpu)
727 {
728 assert(!ml_get_interrupts_enabled());
729
730 if (mp_PM_func != NULL) {
731 if (cpu == cpu_number())
732 mp_PM_func();
733 else
734 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
735 }
736 }
737
738 void
739 PM_interrupt_register(void (*fn)(void))
740 {
741 mp_PM_func = fn;
742 }
743
744 void
745 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
746 {
747 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
748 uint64_t tsc_timeout;
749
750
751 if (!cpu_datap(cpu)->cpu_running)
752 return;
753
754 if (event == MP_TLB_FLUSH)
755 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
756
757 DBGLOG(cpu_signal, cpu, event);
758
759 i_bit_set(event, signals);
760 i386_cpu_IPI(cpu);
761 if (mode == SYNC) {
762 again:
763 tsc_timeout = !machine_timeout_suspended() ?
764 rdtsc64() + (1000*1000*1000) :
765 ~0ULL;
766 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
767 cpu_pause();
768 }
769 if (i_bit(event, signals)) {
770 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
771 cpu, event);
772 goto again;
773 }
774 }
775 if (event == MP_TLB_FLUSH)
776 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
777 }
778
779 /*
780 * Helper function called when busy-waiting: panic if too long
781 * a TSC-based time has elapsed since the start of the spin.
782 */
783 static boolean_t
784 mp_spin_timeout(uint64_t tsc_start)
785 {
786 uint64_t tsc_timeout;
787
788 cpu_pause();
789 if (machine_timeout_suspended())
790 return FALSE;
791
792 /*
793 * The timeout is 4 * the spinlock timeout period
794 * unless we have serial console printing (kprintf) enabled
795 * in which case we allow an even greater margin.
796 */
797 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
798 : LockTimeOutTSC << 4;
799 return (rdtsc64() > tsc_start + tsc_timeout);
800 }
801
802 /*
803 * Helper function to take a spinlock while ensuring that incoming IPIs
804 * are still serviced if interrupts are masked while we spin.
805 * Returns current interrupt state.
806 */
807 boolean_t
808 mp_safe_spin_lock(usimple_lock_t lock)
809 {
810 if (ml_get_interrupts_enabled()) {
811 simple_lock(lock);
812 return TRUE;
813 } else {
814 uint64_t tsc_spin_start = rdtsc64();
815 while (!simple_lock_try(lock)) {
816 cpu_signal_handler(NULL);
817 if (mp_spin_timeout(tsc_spin_start)) {
818 uint32_t lock_cpu;
819 uintptr_t lowner = (uintptr_t)
820 lock->interlock.lock_data;
821 spinlock_timed_out = lock;
822 lock_cpu = spinlock_timeout_NMI(lowner);
823 NMIPI_panic(cpu_to_cpumask(lock_cpu), SPINLOCK_TIMEOUT);
824 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
825 lock, lowner, current_thread(), lock_cpu, mach_absolute_time());
826 }
827 }
828 return FALSE;
829 }
830 }
831
832 /*
833 * All-CPU rendezvous:
834 * - CPUs are signalled,
835 * - all execute the setup function (if specified),
836 * - rendezvous (i.e. all cpus reach a barrier),
837 * - all execute the action function (if specified),
838 * - rendezvous again,
839 * - execute the teardown function (if specified), and then
840 * - resume.
841 *
842 * Note that the supplied external functions _must_ be reentrant and aware
843 * that they are running in parallel and in an unknown lock context.
844 */
845
846 static void
847 mp_rendezvous_action(__unused void *null)
848 {
849 boolean_t intrs_enabled;
850 uint64_t tsc_spin_start;
851
852 /* setup function */
853 if (mp_rv_setup_func != NULL)
854 mp_rv_setup_func(mp_rv_func_arg);
855
856 intrs_enabled = ml_get_interrupts_enabled();
857
858 /* spin on entry rendezvous */
859 atomic_incl(&mp_rv_entry, 1);
860 tsc_spin_start = rdtsc64();
861
862 while (mp_rv_entry < mp_rv_ncpus) {
863 /* poll for pesky tlb flushes if interrupts disabled */
864 if (!intrs_enabled)
865 handle_pending_TLB_flushes();
866 if (mp_spin_timeout(tsc_spin_start)) {
867 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
868 }
869 }
870
871 /* action function */
872 if (mp_rv_action_func != NULL)
873 mp_rv_action_func(mp_rv_func_arg);
874
875 /* spin on exit rendezvous */
876 atomic_incl(&mp_rv_exit, 1);
877 tsc_spin_start = rdtsc64();
878 while (mp_rv_exit < mp_rv_ncpus) {
879 if (!intrs_enabled)
880 handle_pending_TLB_flushes();
881 if (mp_spin_timeout(tsc_spin_start))
882 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
883 }
884
885 /* teardown function */
886 if (mp_rv_teardown_func != NULL)
887 mp_rv_teardown_func(mp_rv_func_arg);
888
889 /* Bump completion count */
890 atomic_incl(&mp_rv_complete, 1);
891 }
892
893 void
894 mp_rendezvous(void (*setup_func)(void *),
895 void (*action_func)(void *),
896 void (*teardown_func)(void *),
897 void *arg)
898 {
899 uint64_t tsc_spin_start;
900
901 if (!smp_initialized) {
902 if (setup_func != NULL)
903 setup_func(arg);
904 if (action_func != NULL)
905 action_func(arg);
906 if (teardown_func != NULL)
907 teardown_func(arg);
908 return;
909 }
910
911 /* obtain rendezvous lock */
912 (void) mp_safe_spin_lock(&mp_rv_lock);
913
914 /* set static function pointers */
915 mp_rv_setup_func = setup_func;
916 mp_rv_action_func = action_func;
917 mp_rv_teardown_func = teardown_func;
918 mp_rv_func_arg = arg;
919
920 mp_rv_entry = 0;
921 mp_rv_exit = 0;
922 mp_rv_complete = 0;
923
924 /*
925 * signal other processors, which will call mp_rendezvous_action()
926 * with interrupts disabled
927 */
928 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
929
930 /* call executor function on this cpu */
931 mp_rendezvous_action(NULL);
932
933 /*
934 * Spin for everyone to complete.
935 * This is necessary to ensure that all processors have proceeded
936 * from the exit barrier before we release the rendezvous structure.
937 */
938 tsc_spin_start = rdtsc64();
939 while (mp_rv_complete < mp_rv_ncpus) {
940 if (mp_spin_timeout(tsc_spin_start))
941 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
942 }
943
944 /* Tidy up */
945 mp_rv_setup_func = NULL;
946 mp_rv_action_func = NULL;
947 mp_rv_teardown_func = NULL;
948 mp_rv_func_arg = NULL;
949
950 /* release lock */
951 simple_unlock(&mp_rv_lock);
952 }
953
954 void
955 mp_rendezvous_break_lock(void)
956 {
957 simple_lock_init(&mp_rv_lock, 0);
958 }
959
960 static void
961 setup_disable_intrs(__unused void * param_not_used)
962 {
963 /* disable interrupts before the first barrier */
964 boolean_t intr = ml_set_interrupts_enabled(FALSE);
965
966 current_cpu_datap()->cpu_iflag = intr;
967 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
968 }
969
970 static void
971 teardown_restore_intrs(__unused void * param_not_used)
972 {
973 /* restore interrupt flag following MTRR changes */
974 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
975 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
976 }
977
978 /*
979 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
980 * This is exported for use by kexts.
981 */
982 void
983 mp_rendezvous_no_intrs(
984 void (*action_func)(void *),
985 void *arg)
986 {
987 mp_rendezvous(setup_disable_intrs,
988 action_func,
989 teardown_restore_intrs,
990 arg);
991 }
992
993
994 typedef struct {
995 queue_chain_t link; /* queue linkage */
996 void (*func)(void *,void *); /* routine to call */
997 void *arg0; /* routine's 1st arg */
998 void *arg1; /* routine's 2nd arg */
999 cpumask_t *maskp; /* completion response mask */
1000 } mp_call_t;
1001
1002
1003 typedef struct {
1004 queue_head_t queue;
1005 decl_simple_lock_data(, lock);
1006 } mp_call_queue_t;
1007 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1008 static mp_call_queue_t mp_cpus_call_freelist;
1009 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
1010
1011 static inline boolean_t
1012 mp_call_head_lock(mp_call_queue_t *cqp)
1013 {
1014 boolean_t intrs_enabled;
1015
1016 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1017 simple_lock(&cqp->lock);
1018
1019 return intrs_enabled;
1020 }
1021
1022 /*
1023 * Deliver an NMIPI to a set of processors to cause them to panic .
1024 */
1025 void
1026 NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why) {
1027 unsigned int cpu, cpu_bit;
1028 uint64_t deadline;
1029
1030 NMIPI_enable(TRUE);
1031 NMI_panic_reason = why;
1032
1033 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1034 if ((cpu_mask & cpu_bit) == 0)
1035 continue;
1036 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1037 cpu_NMI_interrupt(cpu);
1038 }
1039
1040 /* Wait (only so long) for NMi'ed cpus to respond */
1041 deadline = mach_absolute_time() + LockTimeOut;
1042 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1043 if ((cpu_mask & cpu_bit) == 0)
1044 continue;
1045 while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
1046 mach_absolute_time() < deadline) {
1047 cpu_pause();
1048 }
1049 }
1050 }
1051
1052 #if MACH_ASSERT
1053 static inline boolean_t
1054 mp_call_head_is_locked(mp_call_queue_t *cqp)
1055 {
1056 return !ml_get_interrupts_enabled() &&
1057 hw_lock_held((hw_lock_t)&cqp->lock);
1058 }
1059 #endif
1060
1061 static inline void
1062 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
1063 {
1064 simple_unlock(&cqp->lock);
1065 ml_set_interrupts_enabled(intrs_enabled);
1066 }
1067
1068 static inline mp_call_t *
1069 mp_call_alloc(void)
1070 {
1071 mp_call_t *callp = NULL;
1072 boolean_t intrs_enabled;
1073 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1074
1075 intrs_enabled = mp_call_head_lock(cqp);
1076 if (!queue_empty(&cqp->queue))
1077 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1078 mp_call_head_unlock(cqp, intrs_enabled);
1079
1080 return callp;
1081 }
1082
1083 static inline void
1084 mp_call_free(mp_call_t *callp)
1085 {
1086 boolean_t intrs_enabled;
1087 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1088
1089 intrs_enabled = mp_call_head_lock(cqp);
1090 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1091 mp_call_head_unlock(cqp, intrs_enabled);
1092 }
1093
1094 static inline mp_call_t *
1095 mp_call_dequeue_locked(mp_call_queue_t *cqp)
1096 {
1097 mp_call_t *callp = NULL;
1098
1099 assert(mp_call_head_is_locked(cqp));
1100 if (!queue_empty(&cqp->queue))
1101 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1102 return callp;
1103 }
1104
1105 static inline void
1106 mp_call_enqueue_locked(
1107 mp_call_queue_t *cqp,
1108 mp_call_t *callp)
1109 {
1110 queue_enter(&cqp->queue, callp, typeof(callp), link);
1111 }
1112
1113 /* Called on the boot processor to initialize global structures */
1114 static void
1115 mp_cpus_call_init(void)
1116 {
1117 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1118
1119 DBG("mp_cpus_call_init()\n");
1120 simple_lock_init(&cqp->lock, 0);
1121 queue_init(&cqp->queue);
1122 }
1123
1124 /*
1125 * Called at processor registration to add call buffers to the free list
1126 * and to initialize the per-cpu call queue.
1127 */
1128 void
1129 mp_cpus_call_cpu_init(int cpu)
1130 {
1131 int i;
1132 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1133 mp_call_t *callp;
1134
1135 simple_lock_init(&cqp->lock, 0);
1136 queue_init(&cqp->queue);
1137 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1138 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
1139 mp_call_free(callp);
1140 }
1141
1142 DBG("mp_cpus_call_init(%d) done\n", cpu);
1143 }
1144
1145 /*
1146 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1147 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1148 */
1149 static void
1150 mp_cpus_call_action(void)
1151 {
1152 mp_call_queue_t *cqp;
1153 boolean_t intrs_enabled;
1154 mp_call_t *callp;
1155 mp_call_t call;
1156
1157 assert(!ml_get_interrupts_enabled());
1158 cqp = &mp_cpus_call_head[cpu_number()];
1159 intrs_enabled = mp_call_head_lock(cqp);
1160 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1161 /* Copy call request to the stack to free buffer */
1162 call = *callp;
1163 mp_call_free(callp);
1164 if (call.func != NULL) {
1165 mp_call_head_unlock(cqp, intrs_enabled);
1166 KERNEL_DEBUG_CONSTANT(
1167 TRACE_MP_CPUS_CALL_ACTION,
1168 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1169 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
1170 call.func(call.arg0, call.arg1);
1171 (void) mp_call_head_lock(cqp);
1172 }
1173 if (call.maskp != NULL)
1174 i_bit_set(cpu_number(), call.maskp);
1175 }
1176 mp_call_head_unlock(cqp, intrs_enabled);
1177 }
1178
1179 /*
1180 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1181 * Possible modes are:
1182 * SYNC: function is called serially on target cpus in logical cpu order
1183 * waiting for each call to be acknowledged before proceeding
1184 * ASYNC: function call is queued to the specified cpus
1185 * waiting for all calls to complete in parallel before returning
1186 * NOSYNC: function calls are queued
1187 * but we return before confirmation of calls completing.
1188 * The action function may be NULL.
1189 * The cpu mask may include the local cpu. Offline cpus are ignored.
1190 * The return value is the number of cpus on which the call was made or queued.
1191 */
1192 cpu_t
1193 mp_cpus_call(
1194 cpumask_t cpus,
1195 mp_sync_t mode,
1196 void (*action_func)(void *),
1197 void *arg)
1198 {
1199 return mp_cpus_call1(
1200 cpus,
1201 mode,
1202 (void (*)(void *,void *))action_func,
1203 arg,
1204 NULL,
1205 NULL);
1206 }
1207
1208 static void
1209 mp_cpus_call_wait(boolean_t intrs_enabled,
1210 cpumask_t cpus_called,
1211 cpumask_t *cpus_responded)
1212 {
1213 mp_call_queue_t *cqp;
1214 uint64_t tsc_spin_start;
1215
1216 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1217 cqp = &mp_cpus_call_head[cpu_number()];
1218
1219 tsc_spin_start = rdtsc64();
1220 while (*cpus_responded != cpus_called) {
1221 if (!intrs_enabled) {
1222 /* Sniffing w/o locking */
1223 if (!queue_empty(&cqp->queue))
1224 mp_cpus_call_action();
1225 cpu_signal_handler(NULL);
1226 }
1227 if (mp_spin_timeout(tsc_spin_start)) {
1228 cpumask_t cpus_unresponsive;
1229
1230 cpus_unresponsive = cpus_called & ~(*cpus_responded);
1231 NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
1232 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1233 cpus_unresponsive);
1234 }
1235 }
1236 }
1237
1238 cpu_t
1239 mp_cpus_call1(
1240 cpumask_t cpus,
1241 mp_sync_t mode,
1242 void (*action_func)(void *, void *),
1243 void *arg0,
1244 void *arg1,
1245 cpumask_t *cpus_calledp)
1246 {
1247 cpu_t cpu = 0;
1248 boolean_t intrs_enabled = FALSE;
1249 boolean_t call_self = FALSE;
1250 cpumask_t cpus_called = 0;
1251 cpumask_t cpus_responded = 0;
1252 long cpus_call_count = 0;
1253 uint64_t tsc_spin_start;
1254 boolean_t topo_lock;
1255
1256 KERNEL_DEBUG_CONSTANT(
1257 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1258 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
1259
1260 if (!smp_initialized) {
1261 if ((cpus & CPUMASK_SELF) == 0)
1262 goto out;
1263 if (action_func != NULL) {
1264 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1265 action_func(arg0, arg1);
1266 ml_set_interrupts_enabled(intrs_enabled);
1267 }
1268 call_self = TRUE;
1269 goto out;
1270 }
1271
1272 /*
1273 * Queue the call for each non-local requested cpu.
1274 * This is performed under the topo lock to prevent changes to
1275 * cpus online state and to prevent concurrent rendezvouses --
1276 * although an exception is made if we're calling only the master
1277 * processor since that always remains active. Note: this exception
1278 * is expected for longterm timer nosync cross-calls to the master cpu.
1279 */
1280 mp_disable_preemption();
1281 intrs_enabled = ml_get_interrupts_enabled();
1282 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1283 if (topo_lock) {
1284 ml_set_interrupts_enabled(FALSE);
1285 (void) mp_safe_spin_lock(&x86_topo_lock);
1286 }
1287 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1288 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1289 !cpu_is_running(cpu))
1290 continue;
1291 tsc_spin_start = rdtsc64();
1292 if (cpu == (cpu_t) cpu_number()) {
1293 /*
1294 * We don't IPI ourself and if calling asynchronously,
1295 * we defer our call until we have signalled all others.
1296 */
1297 call_self = TRUE;
1298 if (mode == SYNC && action_func != NULL) {
1299 KERNEL_DEBUG_CONSTANT(
1300 TRACE_MP_CPUS_CALL_LOCAL,
1301 VM_KERNEL_UNSLIDE(action_func),
1302 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1303 action_func(arg0, arg1);
1304 }
1305 } else {
1306 /*
1307 * Here to queue a call to cpu and IPI.
1308 */
1309 mp_call_t *callp = NULL;
1310 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1311 boolean_t intrs_inner;
1312
1313 queue_call:
1314 if (callp == NULL)
1315 callp = mp_call_alloc();
1316 intrs_inner = mp_call_head_lock(cqp);
1317 if (callp == NULL) {
1318 mp_call_head_unlock(cqp, intrs_inner);
1319 KERNEL_DEBUG_CONSTANT(
1320 TRACE_MP_CPUS_CALL_NOBUF,
1321 cpu, 0, 0, 0, 0);
1322 if (!intrs_inner) {
1323 /* Sniffing w/o locking */
1324 if (!queue_empty(&cqp->queue))
1325 mp_cpus_call_action();
1326 handle_pending_TLB_flushes();
1327 }
1328 if (mp_spin_timeout(tsc_spin_start))
1329 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1330 tsc_spin_start, rdtsc64());
1331 goto queue_call;
1332 }
1333 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
1334 callp->func = action_func;
1335 callp->arg0 = arg0;
1336 callp->arg1 = arg1;
1337 mp_call_enqueue_locked(cqp, callp);
1338 cpus_call_count++;
1339 cpus_called |= cpu_to_cpumask(cpu);
1340 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1341 mp_call_head_unlock(cqp, intrs_inner);
1342 if (mode == SYNC) {
1343 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
1344 }
1345 }
1346 }
1347 if (topo_lock) {
1348 simple_unlock(&x86_topo_lock);
1349 ml_set_interrupts_enabled(intrs_enabled);
1350 }
1351
1352 /* Call locally if mode not SYNC */
1353 if (mode != SYNC && call_self ) {
1354 KERNEL_DEBUG_CONSTANT(
1355 TRACE_MP_CPUS_CALL_LOCAL,
1356 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1357 if (action_func != NULL) {
1358 ml_set_interrupts_enabled(FALSE);
1359 action_func(arg0, arg1);
1360 ml_set_interrupts_enabled(intrs_enabled);
1361 }
1362 }
1363
1364 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1365 if (mode == ASYNC)
1366 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
1367
1368 /* Safe to allow pre-emption now */
1369 mp_enable_preemption();
1370
1371 out:
1372 if (call_self){
1373 cpus_called |= cpu_to_cpumask(cpu);
1374 cpus_call_count++;
1375 }
1376
1377 if (cpus_calledp)
1378 *cpus_calledp = cpus_called;
1379
1380 KERNEL_DEBUG_CONSTANT(
1381 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1382 cpus_call_count, cpus_called, 0, 0, 0);
1383
1384 return (cpu_t) cpus_call_count;
1385 }
1386
1387
1388 static void
1389 mp_broadcast_action(__unused void *null)
1390 {
1391 /* call action function */
1392 if (mp_bc_action_func != NULL)
1393 mp_bc_action_func(mp_bc_func_arg);
1394
1395 /* if we're the last one through, wake up the instigator */
1396 if (atomic_decl_and_test(&mp_bc_count, 1))
1397 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1398 }
1399
1400 /*
1401 * mp_broadcast() runs a given function on all active cpus.
1402 * The caller blocks until the functions has run on all cpus.
1403 * The caller will also block if there is another pending braodcast.
1404 */
1405 void
1406 mp_broadcast(
1407 void (*action_func)(void *),
1408 void *arg)
1409 {
1410 if (!smp_initialized) {
1411 if (action_func != NULL)
1412 action_func(arg);
1413 return;
1414 }
1415
1416 /* obtain broadcast lock */
1417 lck_mtx_lock(&mp_bc_lock);
1418
1419 /* set static function pointers */
1420 mp_bc_action_func = action_func;
1421 mp_bc_func_arg = arg;
1422
1423 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1424
1425 /*
1426 * signal other processors, which will call mp_broadcast_action()
1427 */
1428 mp_bc_count = real_ncpus; /* assume max possible active */
1429 mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1;
1430 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
1431
1432 /* call executor function on this cpu */
1433 mp_broadcast_action(NULL);
1434
1435 /* block for other cpus to have run action_func */
1436 if (mp_bc_ncpus > 1)
1437 thread_block(THREAD_CONTINUE_NULL);
1438 else
1439 clear_wait(current_thread(), THREAD_AWAKENED);
1440
1441 /* release lock */
1442 lck_mtx_unlock(&mp_bc_lock);
1443 }
1444
1445 void
1446 mp_cpus_kick(cpumask_t cpus)
1447 {
1448 cpu_t cpu;
1449 boolean_t intrs_enabled = FALSE;
1450
1451 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1452 mp_safe_spin_lock(&x86_topo_lock);
1453
1454 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1455 if ((cpu == (cpu_t) cpu_number())
1456 || ((cpu_to_cpumask(cpu) & cpus) == 0)
1457 || !cpu_is_running(cpu))
1458 {
1459 continue;
1460 }
1461
1462 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1463 }
1464
1465 simple_unlock(&x86_topo_lock);
1466 ml_set_interrupts_enabled(intrs_enabled);
1467 }
1468
1469 void
1470 i386_activate_cpu(void)
1471 {
1472 cpu_data_t *cdp = current_cpu_datap();
1473
1474 assert(!ml_get_interrupts_enabled());
1475
1476 if (!smp_initialized) {
1477 cdp->cpu_running = TRUE;
1478 return;
1479 }
1480
1481 mp_safe_spin_lock(&x86_topo_lock);
1482 cdp->cpu_running = TRUE;
1483 started_cpu();
1484 simple_unlock(&x86_topo_lock);
1485 flush_tlb_raw();
1486 }
1487
1488 void
1489 i386_deactivate_cpu(void)
1490 {
1491 cpu_data_t *cdp = current_cpu_datap();
1492
1493 assert(!ml_get_interrupts_enabled());
1494
1495 KERNEL_DEBUG_CONSTANT(
1496 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1497 0, 0, 0, 0, 0);
1498
1499 mp_safe_spin_lock(&x86_topo_lock);
1500 cdp->cpu_running = FALSE;
1501 simple_unlock(&x86_topo_lock);
1502
1503 /*
1504 * Move all of this cpu's timers to the master/boot cpu,
1505 * and poke it in case there's a sooner deadline for it to schedule.
1506 */
1507 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1508 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
1509
1510 #if MONOTONIC
1511 mt_cpu_down(cdp);
1512 #endif /* MONOTONIC */
1513
1514 /*
1515 * Open an interrupt window
1516 * and ensure any pending IPI or timer is serviced
1517 */
1518 mp_disable_preemption();
1519 ml_set_interrupts_enabled(TRUE);
1520
1521 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1522 cpu_pause();
1523 /*
1524 * Ensure there's no remaining timer deadline set
1525 * - AICPM may have left one active.
1526 */
1527 setPop(0);
1528
1529 ml_set_interrupts_enabled(FALSE);
1530 mp_enable_preemption();
1531
1532 KERNEL_DEBUG_CONSTANT(
1533 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1534 0, 0, 0, 0, 0);
1535 }
1536
1537 int pmsafe_debug = 1;
1538
1539 #if MACH_KDP
1540 volatile boolean_t mp_kdp_trap = FALSE;
1541 volatile boolean_t mp_kdp_is_NMI = FALSE;
1542 volatile unsigned long mp_kdp_ncpus;
1543 boolean_t mp_kdp_state;
1544
1545
1546 void
1547 mp_kdp_enter(boolean_t proceed_on_failure)
1548 {
1549 unsigned int cpu;
1550 unsigned int ncpus = 0;
1551 unsigned int my_cpu;
1552 uint64_t tsc_timeout;
1553
1554 DBG("mp_kdp_enter()\n");
1555
1556 /*
1557 * Here to enter the debugger.
1558 * In case of races, only one cpu is allowed to enter kdp after
1559 * stopping others.
1560 */
1561 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1562 my_cpu = cpu_number();
1563
1564 if (my_cpu == (unsigned) debugger_cpu) {
1565 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1566 kdp_reset();
1567 return;
1568 }
1569
1570 uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1571 int locked = 0;
1572 while (!locked || mp_kdp_trap) {
1573 if (locked) {
1574 simple_unlock(&x86_topo_lock);
1575 }
1576 if (proceed_on_failure) {
1577 if (mach_absolute_time() - start_time > 500000000ll) {
1578 kprintf("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1579 break;
1580 }
1581 locked = simple_lock_try(&x86_topo_lock);
1582 if (!locked) {
1583 cpu_pause();
1584 }
1585 } else {
1586 mp_safe_spin_lock(&x86_topo_lock);
1587 locked = TRUE;
1588 }
1589
1590 if (locked && mp_kdp_trap) {
1591 simple_unlock(&x86_topo_lock);
1592 DBG("mp_kdp_enter() race lost\n");
1593 #if MACH_KDP
1594 mp_kdp_wait(TRUE, FALSE);
1595 #endif
1596 locked = FALSE;
1597 }
1598 }
1599
1600 if (pmsafe_debug && !kdp_snapshot)
1601 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1602
1603 debugger_cpu = my_cpu;
1604 ncpus = 1;
1605 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1606 mp_kdp_trap = TRUE;
1607 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1608
1609 /*
1610 * Deliver a nudge to other cpus, counting how many
1611 */
1612 DBG("mp_kdp_enter() signaling other processors\n");
1613 if (force_immediate_debugger_NMI == FALSE) {
1614 for (cpu = 0; cpu < real_ncpus; cpu++) {
1615 if (cpu == my_cpu || !cpu_is_running(cpu))
1616 continue;
1617 ncpus++;
1618 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1619 }
1620 /*
1621 * Wait other processors to synchronize
1622 */
1623 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1624
1625 /*
1626 * This timeout is rather arbitrary; we don't want to NMI
1627 * processors that are executing at potentially
1628 * "unsafe-to-interrupt" points such as the trampolines,
1629 * but neither do we want to lose state by waiting too long.
1630 */
1631 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1632
1633 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1634 /*
1635 * A TLB shootdown request may be pending--this would
1636 * result in the requesting processor waiting in
1637 * PMAP_UPDATE_TLBS() until this processor deals with it.
1638 * Process it, so it can now enter mp_kdp_wait()
1639 */
1640 handle_pending_TLB_flushes();
1641 cpu_pause();
1642 }
1643 /* If we've timed out, and some processor(s) are still unresponsive,
1644 * interrupt them with an NMI via the local APIC, iff a panic is
1645 * in progress.
1646 */
1647 if (panic_active()) {
1648 NMIPI_enable(TRUE);
1649 }
1650 if (mp_kdp_ncpus != ncpus) {
1651 cpumask_t cpus_NMI_pending = 0;
1652 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1653 for (cpu = 0; cpu < real_ncpus; cpu++) {
1654 if (cpu == my_cpu || !cpu_is_running(cpu))
1655 continue;
1656 if (cpu_signal_pending(cpu, MP_KDP)) {
1657 cpus_NMI_pending |= cpu_to_cpumask(cpu);
1658 cpu_NMI_interrupt(cpu);
1659 }
1660 }
1661 /* Wait again for the same timeout */
1662 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1663 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1664 handle_pending_TLB_flushes();
1665 cpu_pause();
1666 }
1667 if (mp_kdp_ncpus != ncpus) {
1668 kdb_printf("mp_kdp_enter(): %llu, %lu, %u TIMED-OUT WAITING FOR NMI-ACK, PROCEEDING\n", cpus_NMI_pending, mp_kdp_ncpus, ncpus);
1669 }
1670 }
1671 }
1672 else
1673 for (cpu = 0; cpu < real_ncpus; cpu++) {
1674 if (cpu == my_cpu || !cpu_is_running(cpu))
1675 continue;
1676 cpu_NMI_interrupt(cpu);
1677 }
1678
1679 if (locked) {
1680 simple_unlock(&x86_topo_lock);
1681 }
1682
1683 DBG("mp_kdp_enter() %d processors done %s\n",
1684 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1685
1686 postcode(MP_KDP_ENTER);
1687 }
1688
1689 static boolean_t
1690 cpu_signal_pending(int cpu, mp_event_t event)
1691 {
1692 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1693 boolean_t retval = FALSE;
1694
1695 if (i_bit(event, signals))
1696 retval = TRUE;
1697 return retval;
1698 }
1699
1700 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1701 void *arg0, void *arg1)
1702 {
1703 if (lcpu > (real_ncpus - 1))
1704 return -1;
1705
1706 if (func == NULL)
1707 return -1;
1708
1709 kdp_xcpu_call_func.func = func;
1710 kdp_xcpu_call_func.ret = -1;
1711 kdp_xcpu_call_func.arg0 = arg0;
1712 kdp_xcpu_call_func.arg1 = arg1;
1713 kdp_xcpu_call_func.cpu = lcpu;
1714 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1715 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1716 cpu_pause();
1717 return kdp_xcpu_call_func.ret;
1718 }
1719
1720 static void
1721 kdp_x86_xcpu_poll(void)
1722 {
1723 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1724 kdp_xcpu_call_func.ret =
1725 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1726 kdp_xcpu_call_func.arg1,
1727 cpu_number());
1728 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1729 }
1730 }
1731
1732 static void
1733 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1734 {
1735 DBG("mp_kdp_wait()\n");
1736
1737 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1738 #if CONFIG_MCA
1739 /* If we've trapped due to a machine-check, save MCA registers */
1740 mca_check_save();
1741 #endif
1742
1743 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1744 while (mp_kdp_trap || (isNMI == TRUE)) {
1745 /*
1746 * A TLB shootdown request may be pending--this would result
1747 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1748 * until this processor handles it.
1749 * Process it, so it can now enter mp_kdp_wait()
1750 */
1751 if (flush)
1752 handle_pending_TLB_flushes();
1753
1754 kdp_x86_xcpu_poll();
1755 cpu_pause();
1756 }
1757
1758 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1759 DBG("mp_kdp_wait() done\n");
1760 }
1761
1762 void
1763 mp_kdp_exit(void)
1764 {
1765 DBG("mp_kdp_exit()\n");
1766 debugger_cpu = -1;
1767 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1768
1769 debugger_exit_time = mach_absolute_time();
1770
1771 mp_kdp_trap = FALSE;
1772 mfence();
1773
1774 /* Wait other processors to stop spinning. XXX needs timeout */
1775 DBG("mp_kdp_exit() waiting for processors to resume\n");
1776 while (mp_kdp_ncpus > 0) {
1777 /*
1778 * a TLB shootdown request may be pending... this would result in the requesting
1779 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1780 * Process it, so it can now enter mp_kdp_wait()
1781 */
1782 handle_pending_TLB_flushes();
1783
1784 cpu_pause();
1785 }
1786
1787 if (pmsafe_debug && !kdp_snapshot)
1788 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1789
1790 debugger_exit_time = mach_absolute_time();
1791
1792 DBG("mp_kdp_exit() done\n");
1793 (void) ml_set_interrupts_enabled(mp_kdp_state);
1794 postcode(MP_KDP_EXIT);
1795 }
1796
1797 #endif /* MACH_KDP */
1798
1799 boolean_t
1800 mp_recent_debugger_activity(void) {
1801 uint64_t abstime = mach_absolute_time();
1802 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1803 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1804 }
1805
1806 /*ARGSUSED*/
1807 void
1808 init_ast_check(
1809 __unused processor_t processor)
1810 {
1811 }
1812
1813 void
1814 cause_ast_check(
1815 processor_t processor)
1816 {
1817 int cpu = processor->cpu_id;
1818
1819 if (cpu != cpu_number()) {
1820 i386_signal_cpu(cpu, MP_AST, ASYNC);
1821 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1822 }
1823 }
1824
1825 void
1826 slave_machine_init(void *param)
1827 {
1828 /*
1829 * Here in process context, but with interrupts disabled.
1830 */
1831 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1832
1833 if (param == FULL_SLAVE_INIT) {
1834 /*
1835 * Cold start
1836 */
1837 clock_init();
1838 }
1839 cpu_machine_init(); /* Interrupts enabled hereafter */
1840 }
1841
1842 #undef cpu_number
1843 int cpu_number(void)
1844 {
1845 return get_cpu_number();
1846 }
1847
1848 static void
1849 cpu_prewarm_init()
1850 {
1851 int i;
1852
1853 simple_lock_init(&cpu_warm_lock, 0);
1854 queue_init(&cpu_warm_call_list);
1855 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1856 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1857 }
1858 }
1859
1860 static timer_call_t
1861 grab_warm_timer_call()
1862 {
1863 spl_t x;
1864 timer_call_t call = NULL;
1865
1866 x = splsched();
1867 simple_lock(&cpu_warm_lock);
1868 if (!queue_empty(&cpu_warm_call_list)) {
1869 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1870 }
1871 simple_unlock(&cpu_warm_lock);
1872 splx(x);
1873
1874 return call;
1875 }
1876
1877 static void
1878 free_warm_timer_call(timer_call_t call)
1879 {
1880 spl_t x;
1881
1882 x = splsched();
1883 simple_lock(&cpu_warm_lock);
1884 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1885 simple_unlock(&cpu_warm_lock);
1886 splx(x);
1887 }
1888
1889 /*
1890 * Runs in timer call context (interrupts disabled).
1891 */
1892 static void
1893 cpu_warm_timer_call_func(
1894 call_entry_param_t p0,
1895 __unused call_entry_param_t p1)
1896 {
1897 free_warm_timer_call((timer_call_t)p0);
1898 return;
1899 }
1900
1901 /*
1902 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1903 */
1904 static void
1905 _cpu_warm_setup(
1906 void *arg)
1907 {
1908 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1909
1910 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
1911 cwdp->cwd_result = 0;
1912
1913 return;
1914 }
1915
1916 /*
1917 * Not safe to call with interrupts disabled.
1918 */
1919 kern_return_t
1920 ml_interrupt_prewarm(
1921 uint64_t deadline)
1922 {
1923 struct cpu_warm_data cwd;
1924 timer_call_t call;
1925 cpu_t ct;
1926
1927 if (ml_get_interrupts_enabled() == FALSE) {
1928 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1929 }
1930
1931 /*
1932 * If the platform doesn't need our help, say that we succeeded.
1933 */
1934 if (!ml_get_interrupt_prewake_applicable()) {
1935 return KERN_SUCCESS;
1936 }
1937
1938 /*
1939 * Grab a timer call to use.
1940 */
1941 call = grab_warm_timer_call();
1942 if (call == NULL) {
1943 return KERN_RESOURCE_SHORTAGE;
1944 }
1945
1946 timer_call_setup(call, cpu_warm_timer_call_func, call);
1947 cwd.cwd_call = call;
1948 cwd.cwd_deadline = deadline;
1949 cwd.cwd_result = 0;
1950
1951 /*
1952 * For now, non-local interrupts happen on the master processor.
1953 */
1954 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1955 if (ct == 0) {
1956 free_warm_timer_call(call);
1957 return KERN_FAILURE;
1958 } else {
1959 return cwd.cwd_result;
1960 }
1961 }
1962
1963 #if DEBUG || DEVELOPMENT
1964 void
1965 kernel_spin(uint64_t spin_ns)
1966 {
1967 boolean_t istate;
1968 uint64_t spin_abs;
1969 uint64_t deadline;
1970 cpu_data_t *cdp;
1971
1972 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
1973 istate = ml_set_interrupts_enabled(FALSE);
1974 cdp = current_cpu_datap();
1975 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
1976
1977 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
1978 cdp->cpu_int_event_time = mach_absolute_time();
1979 cdp->cpu_int_state = (void *) USER_STATE(current_thread());
1980
1981 deadline = mach_absolute_time() + spin_ns;
1982 while (mach_absolute_time() < deadline)
1983 cpu_pause();
1984
1985 cdp->cpu_int_event_time = 0;
1986 cdp->cpu_int_state = NULL;
1987
1988 ml_set_interrupts_enabled(istate);
1989 kprintf("kernel_spin() continuing\n");
1990 }
1991
1992 /*
1993 * Called from the scheduler's maintenance thread,
1994 * scan running processors for long-running ISRs and:
1995 * - panic if longer than LockTimeOut, or
1996 * - log if more than a quantum.
1997 */
1998 void
1999 mp_interrupt_watchdog(void)
2000 {
2001 cpu_t cpu;
2002 boolean_t intrs_enabled = FALSE;
2003 uint16_t cpu_int_num;
2004 uint64_t cpu_int_event_time;
2005 uint64_t cpu_rip;
2006 uint64_t cpu_int_duration;
2007 uint64_t now;
2008 x86_saved_state_t *cpu_int_state;
2009
2010 if (__improbable(!mp_interrupt_watchdog_enabled))
2011 return;
2012
2013 intrs_enabled = ml_set_interrupts_enabled(FALSE);
2014 now = mach_absolute_time();
2015 /*
2016 * While timeouts are not suspended,
2017 * check all other processors for long outstanding interrupt handling.
2018 */
2019 for (cpu = 0;
2020 cpu < (cpu_t) real_ncpus && !machine_timeout_suspended();
2021 cpu++) {
2022 if ((cpu == (cpu_t) cpu_number()) ||
2023 (!cpu_is_running(cpu)))
2024 continue;
2025 cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time;
2026 if (cpu_int_event_time == 0)
2027 continue;
2028 if (__improbable(now < cpu_int_event_time))
2029 continue; /* skip due to inter-processor skew */
2030 cpu_int_state = cpu_datap(cpu)->cpu_int_state;
2031 if (__improbable(cpu_int_state == NULL))
2032 /* The interrupt may have been dismissed */
2033 continue;
2034
2035 /* Here with a cpu handling an interrupt */
2036
2037 cpu_int_duration = now - cpu_int_event_time;
2038 if (__improbable(cpu_int_duration > LockTimeOut)) {
2039 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2040 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2041 vector_timed_out = cpu_int_num;
2042 NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG);
2043 panic("Interrupt watchdog, "
2044 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2045 cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip);
2046 /* NOT REACHED */
2047 } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) {
2048 mp_interrupt_watchdog_events++;
2049 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2050 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2051 ml_set_interrupts_enabled(intrs_enabled);
2052 printf("Interrupt watchdog, "
2053 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2054 cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip);
2055 return;
2056 }
2057 }
2058
2059 ml_set_interrupts_enabled(intrs_enabled);
2060 }
2061 #endif