]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #include <mach_kdp.h>
33 #include <kdp/kdp_internal.h>
34 #include <mach_ldebug.h>
35 #include <gprof.h>
36
37 #include <mach/mach_types.h>
38 #include <mach/kern_return.h>
39
40 #include <kern/kern_types.h>
41 #include <kern/startup.h>
42 #include <kern/timer_queue.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53 #include <prng/random.h>
54
55 #include <vm/vm_map.h>
56 #include <vm/vm_kern.h>
57
58 #include <profiling/profile-mk.h>
59
60 #include <i386/bit_routines.h>
61 #include <i386/proc_reg.h>
62 #include <i386/cpu_threads.h>
63 #include <i386/mp_desc.h>
64 #include <i386/misc_protos.h>
65 #include <i386/trap.h>
66 #include <i386/postcode.h>
67 #include <i386/machine_routines.h>
68 #include <i386/mp.h>
69 #include <i386/mp_events.h>
70 #include <i386/lapic.h>
71 #include <i386/cpuid.h>
72 #include <i386/fpu.h>
73 #include <i386/machine_cpu.h>
74 #include <i386/pmCPU.h>
75 #if CONFIG_MCA
76 #include <i386/machine_check.h>
77 #endif
78 #include <i386/acpi.h>
79
80 #include <chud/chud_xnu.h>
81 #include <chud/chud_xnu_private.h>
82
83 #include <sys/kdebug.h>
84
85 #include <console/serial_protos.h>
86
87 #if MONOTONIC
88 #include <kern/monotonic.h>
89 #endif /* MONOTONIC */
90
91 #if MP_DEBUG
92 #define PAUSE delay(1000000)
93 #define DBG(x...) kprintf(x)
94 #else
95 #define DBG(x...)
96 #define PAUSE
97 #endif /* MP_DEBUG */
98
99 /* Debugging/test trace events: */
100 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
101 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
102 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
103 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
104 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
105 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
106 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
107 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
108
109 #define ABS(v) (((v) > 0)?(v):-(v))
110
111 void slave_boot_init(void);
112 void i386_cpu_IPI(int cpu);
113
114 #if MACH_KDP
115 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
116 #endif /* MACH_KDP */
117
118 #if MACH_KDP
119 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
120 #endif /* MACH_KDP */
121 static int NMIInterruptHandler(x86_saved_state_t *regs);
122
123 boolean_t smp_initialized = FALSE;
124 uint32_t TSC_sync_margin = 0xFFF;
125 volatile boolean_t force_immediate_debugger_NMI = FALSE;
126 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
127 #if DEBUG || DEVELOPMENT
128 boolean_t mp_interrupt_watchdog_enabled = TRUE;
129 uint32_t mp_interrupt_watchdog_events = 0;
130 #endif
131
132 decl_simple_lock_data(,debugger_callback_lock);
133 struct debugger_callback *debugger_callback = NULL;
134
135 decl_lck_mtx_data(static, mp_cpu_boot_lock);
136 lck_mtx_ext_t mp_cpu_boot_lock_ext;
137
138 /* Variables needed for MP rendezvous. */
139 decl_simple_lock_data(,mp_rv_lock);
140 static void (*mp_rv_setup_func)(void *arg);
141 static void (*mp_rv_action_func)(void *arg);
142 static void (*mp_rv_teardown_func)(void *arg);
143 static void *mp_rv_func_arg;
144 static volatile int mp_rv_ncpus;
145 /* Cache-aligned barriers: */
146 static volatile long mp_rv_entry __attribute__((aligned(64)));
147 static volatile long mp_rv_exit __attribute__((aligned(64)));
148 static volatile long mp_rv_complete __attribute__((aligned(64)));
149
150 volatile uint64_t debugger_entry_time;
151 volatile uint64_t debugger_exit_time;
152 #if MACH_KDP
153 #include <kdp/kdp.h>
154 extern int kdp_snapshot;
155 static struct _kdp_xcpu_call_func {
156 kdp_x86_xcpu_func_t func;
157 void *arg0, *arg1;
158 volatile long ret;
159 volatile uint16_t cpu;
160 } kdp_xcpu_call_func = {
161 .cpu = KDP_XCPU_NONE
162 };
163
164 #endif
165
166 /* Variables needed for MP broadcast. */
167 static void (*mp_bc_action_func)(void *arg);
168 static void *mp_bc_func_arg;
169 static int mp_bc_ncpus;
170 static volatile long mp_bc_count;
171 decl_lck_mtx_data(static, mp_bc_lock);
172 lck_mtx_ext_t mp_bc_lock_ext;
173 static volatile int debugger_cpu = -1;
174 volatile long NMIPI_acks = 0;
175 volatile long NMI_count = 0;
176 static NMI_reason_t NMI_panic_reason = NONE;
177 static int vector_timed_out;
178
179 extern void NMI_cpus(void);
180
181 static void mp_cpus_call_init(void);
182 static void mp_cpus_call_action(void);
183 static void mp_call_PM(void);
184
185 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
186
187 /* PAL-related routines */
188 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
189 int ipi_vector, i386_intr_func_t ipi_handler);
190 void i386_start_cpu(int lapic_id, int cpu_num);
191 void i386_send_NMI(int cpu);
192 void NMIPI_enable(boolean_t);
193 #if GPROF
194 /*
195 * Initialize dummy structs for profiling. These aren't used but
196 * allows hertz_tick() to be built with GPROF defined.
197 */
198 struct profile_vars _profile_vars;
199 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
200 #define GPROF_INIT() \
201 { \
202 int i; \
203 \
204 /* Hack to initialize pointers to unused profiling structs */ \
205 for (i = 1; i < MAX_CPUS; i++) \
206 _profile_vars_cpus[i] = &_profile_vars; \
207 }
208 #else
209 #define GPROF_INIT()
210 #endif /* GPROF */
211
212 static lck_grp_t smp_lck_grp;
213 static lck_grp_attr_t smp_lck_grp_attr;
214
215 #define NUM_CPU_WARM_CALLS 20
216 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
217 queue_head_t cpu_warm_call_list;
218 decl_simple_lock_data(static, cpu_warm_lock);
219
220 typedef struct cpu_warm_data {
221 timer_call_t cwd_call;
222 uint64_t cwd_deadline;
223 int cwd_result;
224 } *cpu_warm_data_t;
225
226 static void cpu_prewarm_init(void);
227 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
228 static void _cpu_warm_setup(void *arg);
229 static timer_call_t grab_warm_timer_call(void);
230 static void free_warm_timer_call(timer_call_t call);
231
232 void
233 smp_init(void)
234 {
235 simple_lock_init(&mp_rv_lock, 0);
236 simple_lock_init(&debugger_callback_lock, 0);
237 lck_grp_attr_setdefault(&smp_lck_grp_attr);
238 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
239 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
240 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
241 console_init();
242
243 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
244 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
245 return;
246
247 cpu_thread_init();
248
249 GPROF_INIT();
250 DBGLOG_CPU_INIT(master_cpu);
251
252 mp_cpus_call_init();
253 mp_cpus_call_cpu_init(master_cpu);
254
255 #if DEBUG || DEVELOPMENT
256 if (PE_parse_boot_argn("interrupt_watchdog",
257 &mp_interrupt_watchdog_enabled,
258 sizeof(mp_interrupt_watchdog_enabled))) {
259 kprintf("Interrupt watchdog %sabled\n",
260 mp_interrupt_watchdog_enabled ? "en" : "dis");
261 }
262 #endif
263
264 if (PE_parse_boot_argn("TSC_sync_margin",
265 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
266 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
267 } else if (cpuid_vmm_present()) {
268 kprintf("TSC sync margin disabled\n");
269 TSC_sync_margin = 0;
270 }
271 smp_initialized = TRUE;
272
273 cpu_prewarm_init();
274
275 return;
276 }
277
278 typedef struct {
279 int target_cpu;
280 int target_lapic;
281 int starter_cpu;
282 } processor_start_info_t;
283 static processor_start_info_t start_info __attribute__((aligned(64)));
284
285 /*
286 * Cache-alignment is to avoid cross-cpu false-sharing interference.
287 */
288 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
289 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
290 static volatile uint64_t tsc_target __attribute__((aligned(64)));
291
292 /*
293 * Poll a CPU to see when it has marked itself as running.
294 */
295 static void
296 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
297 {
298 while (iters-- > 0) {
299 if (cpu_datap(slot_num)->cpu_running)
300 break;
301 delay(usecdelay);
302 }
303 }
304
305 /*
306 * Quickly bring a CPU back online which has been halted.
307 */
308 kern_return_t
309 intel_startCPU_fast(int slot_num)
310 {
311 kern_return_t rc;
312
313 /*
314 * Try to perform a fast restart
315 */
316 rc = pmCPUExitHalt(slot_num);
317 if (rc != KERN_SUCCESS)
318 /*
319 * The CPU was not eligible for a fast restart.
320 */
321 return(rc);
322
323 KERNEL_DEBUG_CONSTANT(
324 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
325 slot_num, 0, 0, 0, 0);
326
327 /*
328 * Wait until the CPU is back online.
329 */
330 mp_disable_preemption();
331
332 /*
333 * We use short pauses (1us) for low latency. 30,000 iterations is
334 * longer than a full restart would require so it should be more
335 * than long enough.
336 */
337
338 mp_wait_for_cpu_up(slot_num, 30000, 1);
339 mp_enable_preemption();
340
341 KERNEL_DEBUG_CONSTANT(
342 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
343 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
344
345 /*
346 * Check to make sure that the CPU is really running. If not,
347 * go through the slow path.
348 */
349 if (cpu_datap(slot_num)->cpu_running)
350 return(KERN_SUCCESS);
351 else
352 return(KERN_FAILURE);
353 }
354
355 static void
356 started_cpu(void)
357 {
358 /* Here on the started cpu with cpu_running set TRUE */
359
360 if (TSC_sync_margin &&
361 start_info.target_cpu == cpu_number()) {
362 /*
363 * I've just started-up, synchronize again with the starter cpu
364 * and then snap my TSC.
365 */
366 tsc_target = 0;
367 atomic_decl(&tsc_entry_barrier, 1);
368 while (tsc_entry_barrier != 0)
369 ; /* spin for starter and target at barrier */
370 tsc_target = rdtsc64();
371 atomic_decl(&tsc_exit_barrier, 1);
372 }
373 }
374
375 static void
376 start_cpu(void *arg)
377 {
378 int i = 1000;
379 processor_start_info_t *psip = (processor_start_info_t *) arg;
380
381 /* Ignore this if the current processor is not the starter */
382 if (cpu_number() != psip->starter_cpu)
383 return;
384
385 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
386 arg, psip->target_cpu, psip->target_lapic);
387
388 KERNEL_DEBUG_CONSTANT(
389 TRACE_MP_CPU_START | DBG_FUNC_START,
390 psip->target_cpu,
391 psip->target_lapic, 0, 0, 0);
392
393 i386_start_cpu(psip->target_lapic, psip->target_cpu);
394
395 #ifdef POSTCODE_DELAY
396 /* Wait much longer if postcodes are displayed for a delay period. */
397 i *= 10000;
398 #endif
399 DBG("start_cpu(%p) about to wait for cpu %d\n",
400 arg, psip->target_cpu);
401
402 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
403
404 KERNEL_DEBUG_CONSTANT(
405 TRACE_MP_CPU_START | DBG_FUNC_END,
406 psip->target_cpu,
407 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
408
409 if (TSC_sync_margin &&
410 cpu_datap(psip->target_cpu)->cpu_running) {
411 /*
412 * Compare the TSC from the started processor with ours.
413 * Report and log/panic if it diverges by more than
414 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
415 * can be overriden by boot-arg (with 0 meaning no checking).
416 */
417 uint64_t tsc_starter;
418 int64_t tsc_delta;
419 atomic_decl(&tsc_entry_barrier, 1);
420 while (tsc_entry_barrier != 0)
421 ; /* spin for both processors at barrier */
422 tsc_starter = rdtsc64();
423 atomic_decl(&tsc_exit_barrier, 1);
424 while (tsc_exit_barrier != 0)
425 ; /* spin for target to store its TSC */
426 tsc_delta = tsc_target - tsc_starter;
427 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
428 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
429 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
430 #if DEBUG
431 panic(
432 #else
433 printf(
434 #endif
435 "Unsynchronized TSC for cpu %d: "
436 "0x%016llx, delta 0x%llx\n",
437 psip->target_cpu, tsc_target, tsc_delta);
438 }
439 }
440 }
441
442 kern_return_t
443 intel_startCPU(
444 int slot_num)
445 {
446 int lapic = cpu_to_lapic[slot_num];
447 boolean_t istate;
448
449 assert(lapic != -1);
450
451 DBGLOG_CPU_INIT(slot_num);
452
453 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
454 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
455
456 /*
457 * Initialize (or re-initialize) the descriptor tables for this cpu.
458 * Propagate processor mode to slave.
459 */
460 cpu_desc_init(cpu_datap(slot_num));
461
462 /* Serialize use of the slave boot stack, etc. */
463 lck_mtx_lock(&mp_cpu_boot_lock);
464
465 istate = ml_set_interrupts_enabled(FALSE);
466 if (slot_num == get_cpu_number()) {
467 ml_set_interrupts_enabled(istate);
468 lck_mtx_unlock(&mp_cpu_boot_lock);
469 return KERN_SUCCESS;
470 }
471
472 start_info.starter_cpu = cpu_number();
473 start_info.target_cpu = slot_num;
474 start_info.target_lapic = lapic;
475 tsc_entry_barrier = 2;
476 tsc_exit_barrier = 2;
477
478 /*
479 * Perform the processor startup sequence with all running
480 * processors rendezvous'ed. This is required during periods when
481 * the cache-disable bit is set for MTRR/PAT initialization.
482 */
483 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
484
485 start_info.target_cpu = 0;
486
487 ml_set_interrupts_enabled(istate);
488 lck_mtx_unlock(&mp_cpu_boot_lock);
489
490 if (!cpu_datap(slot_num)->cpu_running) {
491 kprintf("Failed to start CPU %02d\n", slot_num);
492 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
493 delay(1000000);
494 halt_cpu();
495 return KERN_SUCCESS;
496 } else {
497 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
498 return KERN_SUCCESS;
499 }
500 }
501
502 #if MP_DEBUG
503 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
504 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
505
506 MP_EVENT_NAME_DECL();
507
508 #endif /* MP_DEBUG */
509
510 /*
511 * Note: called with NULL state when polling for TLB flush and cross-calls.
512 */
513 int
514 cpu_signal_handler(x86_saved_state_t *regs)
515 {
516 #if !MACH_KDP
517 #pragma unused (regs)
518 #endif /* !MACH_KDP */
519 int my_cpu;
520 volatile int *my_word;
521
522 SCHED_STATS_IPI(current_processor());
523
524 my_cpu = cpu_number();
525 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
526 /* Store the initial set of signals for diagnostics. New
527 * signals could arrive while these are being processed
528 * so it's no more than a hint.
529 */
530
531 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
532
533 do {
534 #if MACH_KDP
535 if (i_bit(MP_KDP, my_word)) {
536 DBGLOG(cpu_handle,my_cpu,MP_KDP);
537 i_bit_clear(MP_KDP, my_word);
538 /* Ensure that the i386_kernel_state at the base of the
539 * current thread's stack (if any) is synchronized with the
540 * context at the moment of the interrupt, to facilitate
541 * access through the debugger.
542 */
543 sync_iss_to_iks(regs);
544 if (pmsafe_debug && !kdp_snapshot)
545 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
546 mp_kdp_wait(TRUE, FALSE);
547 if (pmsafe_debug && !kdp_snapshot)
548 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
549 } else
550 #endif /* MACH_KDP */
551 if (i_bit(MP_TLB_FLUSH, my_word)) {
552 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
553 i_bit_clear(MP_TLB_FLUSH, my_word);
554 pmap_update_interrupt();
555 } else if (i_bit(MP_CHUD, my_word)) {
556 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
557 i_bit_clear(MP_CHUD, my_word);
558 chudxnu_cpu_signal_handler();
559 } else if (i_bit(MP_CALL, my_word)) {
560 DBGLOG(cpu_handle,my_cpu,MP_CALL);
561 i_bit_clear(MP_CALL, my_word);
562 mp_cpus_call_action();
563 } else if (i_bit(MP_CALL_PM, my_word)) {
564 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
565 i_bit_clear(MP_CALL_PM, my_word);
566 mp_call_PM();
567 }
568 if (regs == NULL) {
569 /* Called to poll only for cross-calls and TLB flush */
570 break;
571 } else if (i_bit(MP_AST, my_word)) {
572 DBGLOG(cpu_handle,my_cpu,MP_AST);
573 i_bit_clear(MP_AST, my_word);
574 ast_check(cpu_to_processor(my_cpu));
575 }
576 } while (*my_word);
577
578 return 0;
579 }
580
581 extern void kprintf_break_lock(void);
582 static int
583 NMIInterruptHandler(x86_saved_state_t *regs)
584 {
585 void *stackptr;
586 char pstr[192];
587 uint64_t now = mach_absolute_time();
588
589 if (panic_active() && !panicDebugging) {
590 if (pmsafe_debug)
591 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
592 for(;;)
593 cpu_pause();
594 }
595
596 atomic_incl(&NMIPI_acks, 1);
597 atomic_incl(&NMI_count, 1);
598 sync_iss_to_iks_unconditionally(regs);
599 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
600
601 if (cpu_number() == debugger_cpu)
602 goto NMExit;
603
604 if (NMI_panic_reason == SPINLOCK_TIMEOUT) {
605 snprintf(&pstr[0], sizeof(pstr),
606 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
607 cpu_number(), now, spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
608 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
609 } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) {
610 snprintf(&pstr[0], sizeof(pstr),
611 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
612 cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid);
613 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
614 } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) {
615 snprintf(&pstr[0], sizeof(pstr),
616 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
617 cpu_number(), now);
618 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
619 } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) {
620 snprintf(&pstr[0], sizeof(pstr),
621 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
622 cpu_number(), now, vector_timed_out);
623 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
624 }
625
626 #if MACH_KDP
627 if (pmsafe_debug && !kdp_snapshot)
628 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
629 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
630 i_bit_clear(MP_KDP, &current_cpu_datap()->cpu_signals);
631 if (panic_active() || NMI_panic_reason != NONE) {
632 mp_kdp_wait(FALSE, TRUE);
633 } else if (!mp_kdp_trap &&
634 !mp_kdp_is_NMI &&
635 virtualized && (debug_boot_arg & DB_NMI)) {
636 /*
637 * Under a VMM with the debug boot-arg set, drop into kdp.
638 * Since an NMI is involved, there's a risk of contending with
639 * a panic. And side-effects of NMIs may result in entry into,
640 * and continuing from, the debugger being unreliable.
641 */
642 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
643 kprintf_break_lock();
644 kprintf("Debugger entry requested by NMI\n");
645 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0);
646 printf("Debugger entry requested by NMI\n");
647 mp_kdp_is_NMI = FALSE;
648 } else {
649 mp_kdp_wait(FALSE, FALSE);
650 }
651 } else {
652 mp_kdp_wait(FALSE, FALSE);
653 }
654 if (pmsafe_debug && !kdp_snapshot)
655 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
656 #endif
657 NMExit:
658 return 1;
659 }
660
661
662 /*
663 * cpu_interrupt is really just to be used by the scheduler to
664 * get a CPU's attention it may not always issue an IPI. If an
665 * IPI is always needed then use i386_cpu_IPI.
666 */
667 void
668 cpu_interrupt(int cpu)
669 {
670 boolean_t did_IPI = FALSE;
671
672 if (smp_initialized
673 && pmCPUExitIdle(cpu_datap(cpu))) {
674 i386_cpu_IPI(cpu);
675 did_IPI = TRUE;
676 }
677
678 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
679 }
680
681 /*
682 * Send a true NMI via the local APIC to the specified CPU.
683 */
684 void
685 cpu_NMI_interrupt(int cpu)
686 {
687 if (smp_initialized) {
688 i386_send_NMI(cpu);
689 }
690 }
691
692 void
693 NMI_cpus(void)
694 {
695 unsigned int cpu;
696 boolean_t intrs_enabled;
697 uint64_t tsc_timeout;
698
699 intrs_enabled = ml_set_interrupts_enabled(FALSE);
700
701 for (cpu = 0; cpu < real_ncpus; cpu++) {
702 if (!cpu_datap(cpu)->cpu_running)
703 continue;
704 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
705 cpu_NMI_interrupt(cpu);
706 tsc_timeout = !machine_timeout_suspended() ?
707 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
708 ~0ULL;
709 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
710 handle_pending_TLB_flushes();
711 cpu_pause();
712 if (rdtsc64() > tsc_timeout)
713 panic("NMI_cpus() timeout cpu %d", cpu);
714 }
715 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
716 }
717
718 ml_set_interrupts_enabled(intrs_enabled);
719 }
720
721 static void (* volatile mp_PM_func)(void) = NULL;
722
723 static void
724 mp_call_PM(void)
725 {
726 assert(!ml_get_interrupts_enabled());
727
728 if (mp_PM_func != NULL)
729 mp_PM_func();
730 }
731
732 void
733 cpu_PM_interrupt(int cpu)
734 {
735 assert(!ml_get_interrupts_enabled());
736
737 if (mp_PM_func != NULL) {
738 if (cpu == cpu_number())
739 mp_PM_func();
740 else
741 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
742 }
743 }
744
745 void
746 PM_interrupt_register(void (*fn)(void))
747 {
748 mp_PM_func = fn;
749 }
750
751 void
752 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
753 {
754 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
755 uint64_t tsc_timeout;
756
757
758 if (!cpu_datap(cpu)->cpu_running)
759 return;
760
761 if (event == MP_TLB_FLUSH)
762 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
763
764 DBGLOG(cpu_signal, cpu, event);
765
766 i_bit_set(event, signals);
767 i386_cpu_IPI(cpu);
768 if (mode == SYNC) {
769 again:
770 tsc_timeout = !machine_timeout_suspended() ?
771 rdtsc64() + (1000*1000*1000) :
772 ~0ULL;
773 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
774 cpu_pause();
775 }
776 if (i_bit(event, signals)) {
777 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
778 cpu, event);
779 goto again;
780 }
781 }
782 if (event == MP_TLB_FLUSH)
783 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
784 }
785
786 /*
787 * Helper function called when busy-waiting: panic if too long
788 * a TSC-based time has elapsed since the start of the spin.
789 */
790 static boolean_t
791 mp_spin_timeout(uint64_t tsc_start)
792 {
793 uint64_t tsc_timeout;
794
795 cpu_pause();
796 if (machine_timeout_suspended())
797 return FALSE;
798
799 /*
800 * The timeout is 4 * the spinlock timeout period
801 * unless we have serial console printing (kprintf) enabled
802 * in which case we allow an even greater margin.
803 */
804 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
805 : LockTimeOutTSC << 4;
806 return (rdtsc64() > tsc_start + tsc_timeout);
807 }
808
809 /*
810 * Helper function to take a spinlock while ensuring that incoming IPIs
811 * are still serviced if interrupts are masked while we spin.
812 * Returns current interrupt state.
813 */
814 boolean_t
815 mp_safe_spin_lock(usimple_lock_t lock)
816 {
817 if (ml_get_interrupts_enabled()) {
818 simple_lock(lock);
819 return TRUE;
820 } else {
821 uint64_t tsc_spin_start = rdtsc64();
822 while (!simple_lock_try(lock)) {
823 cpu_signal_handler(NULL);
824 if (mp_spin_timeout(tsc_spin_start)) {
825 uint32_t lock_cpu;
826 uintptr_t lowner = (uintptr_t)
827 lock->interlock.lock_data;
828 spinlock_timed_out = lock;
829 lock_cpu = spinlock_timeout_NMI(lowner);
830 NMIPI_panic(cpu_to_cpumask(lock_cpu), SPINLOCK_TIMEOUT);
831 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
832 lock, lowner, current_thread(), lock_cpu, mach_absolute_time());
833 }
834 }
835 return FALSE;
836 }
837 }
838
839 /*
840 * All-CPU rendezvous:
841 * - CPUs are signalled,
842 * - all execute the setup function (if specified),
843 * - rendezvous (i.e. all cpus reach a barrier),
844 * - all execute the action function (if specified),
845 * - rendezvous again,
846 * - execute the teardown function (if specified), and then
847 * - resume.
848 *
849 * Note that the supplied external functions _must_ be reentrant and aware
850 * that they are running in parallel and in an unknown lock context.
851 */
852
853 static void
854 mp_rendezvous_action(__unused void *null)
855 {
856 boolean_t intrs_enabled;
857 uint64_t tsc_spin_start;
858
859 /* setup function */
860 if (mp_rv_setup_func != NULL)
861 mp_rv_setup_func(mp_rv_func_arg);
862
863 intrs_enabled = ml_get_interrupts_enabled();
864
865 /* spin on entry rendezvous */
866 atomic_incl(&mp_rv_entry, 1);
867 tsc_spin_start = rdtsc64();
868
869 while (mp_rv_entry < mp_rv_ncpus) {
870 /* poll for pesky tlb flushes if interrupts disabled */
871 if (!intrs_enabled)
872 handle_pending_TLB_flushes();
873 if (mp_spin_timeout(tsc_spin_start)) {
874 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
875 }
876 }
877
878 /* action function */
879 if (mp_rv_action_func != NULL)
880 mp_rv_action_func(mp_rv_func_arg);
881
882 /* spin on exit rendezvous */
883 atomic_incl(&mp_rv_exit, 1);
884 tsc_spin_start = rdtsc64();
885 while (mp_rv_exit < mp_rv_ncpus) {
886 if (!intrs_enabled)
887 handle_pending_TLB_flushes();
888 if (mp_spin_timeout(tsc_spin_start))
889 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
890 }
891
892 /* teardown function */
893 if (mp_rv_teardown_func != NULL)
894 mp_rv_teardown_func(mp_rv_func_arg);
895
896 /* Bump completion count */
897 atomic_incl(&mp_rv_complete, 1);
898 }
899
900 void
901 mp_rendezvous(void (*setup_func)(void *),
902 void (*action_func)(void *),
903 void (*teardown_func)(void *),
904 void *arg)
905 {
906 uint64_t tsc_spin_start;
907
908 if (!smp_initialized) {
909 if (setup_func != NULL)
910 setup_func(arg);
911 if (action_func != NULL)
912 action_func(arg);
913 if (teardown_func != NULL)
914 teardown_func(arg);
915 return;
916 }
917
918 /* obtain rendezvous lock */
919 (void) mp_safe_spin_lock(&mp_rv_lock);
920
921 /* set static function pointers */
922 mp_rv_setup_func = setup_func;
923 mp_rv_action_func = action_func;
924 mp_rv_teardown_func = teardown_func;
925 mp_rv_func_arg = arg;
926
927 mp_rv_entry = 0;
928 mp_rv_exit = 0;
929 mp_rv_complete = 0;
930
931 /*
932 * signal other processors, which will call mp_rendezvous_action()
933 * with interrupts disabled
934 */
935 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
936
937 /* call executor function on this cpu */
938 mp_rendezvous_action(NULL);
939
940 /*
941 * Spin for everyone to complete.
942 * This is necessary to ensure that all processors have proceeded
943 * from the exit barrier before we release the rendezvous structure.
944 */
945 tsc_spin_start = rdtsc64();
946 while (mp_rv_complete < mp_rv_ncpus) {
947 if (mp_spin_timeout(tsc_spin_start))
948 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
949 }
950
951 /* Tidy up */
952 mp_rv_setup_func = NULL;
953 mp_rv_action_func = NULL;
954 mp_rv_teardown_func = NULL;
955 mp_rv_func_arg = NULL;
956
957 /* release lock */
958 simple_unlock(&mp_rv_lock);
959 }
960
961 void
962 mp_rendezvous_break_lock(void)
963 {
964 simple_lock_init(&mp_rv_lock, 0);
965 }
966
967 static void
968 setup_disable_intrs(__unused void * param_not_used)
969 {
970 /* disable interrupts before the first barrier */
971 boolean_t intr = ml_set_interrupts_enabled(FALSE);
972
973 current_cpu_datap()->cpu_iflag = intr;
974 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
975 }
976
977 static void
978 teardown_restore_intrs(__unused void * param_not_used)
979 {
980 /* restore interrupt flag following MTRR changes */
981 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
982 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
983 }
984
985 /*
986 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
987 * This is exported for use by kexts.
988 */
989 void
990 mp_rendezvous_no_intrs(
991 void (*action_func)(void *),
992 void *arg)
993 {
994 mp_rendezvous(setup_disable_intrs,
995 action_func,
996 teardown_restore_intrs,
997 arg);
998 }
999
1000
1001 typedef struct {
1002 queue_chain_t link; /* queue linkage */
1003 void (*func)(void *,void *); /* routine to call */
1004 void *arg0; /* routine's 1st arg */
1005 void *arg1; /* routine's 2nd arg */
1006 cpumask_t *maskp; /* completion response mask */
1007 } mp_call_t;
1008
1009
1010 typedef struct {
1011 queue_head_t queue;
1012 decl_simple_lock_data(, lock);
1013 } mp_call_queue_t;
1014 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1015 static mp_call_queue_t mp_cpus_call_freelist;
1016 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
1017
1018 static inline boolean_t
1019 mp_call_head_lock(mp_call_queue_t *cqp)
1020 {
1021 boolean_t intrs_enabled;
1022
1023 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1024 simple_lock(&cqp->lock);
1025
1026 return intrs_enabled;
1027 }
1028
1029 /*
1030 * Deliver an NMIPI to a set of processors to cause them to panic .
1031 */
1032 void
1033 NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why) {
1034 unsigned int cpu, cpu_bit;
1035 uint64_t deadline;
1036
1037 NMIPI_enable(TRUE);
1038 NMI_panic_reason = why;
1039
1040 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1041 if ((cpu_mask & cpu_bit) == 0)
1042 continue;
1043 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1044 cpu_NMI_interrupt(cpu);
1045 }
1046
1047 /* Wait (only so long) for NMi'ed cpus to respond */
1048 deadline = mach_absolute_time() + LockTimeOut;
1049 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1050 if ((cpu_mask & cpu_bit) == 0)
1051 continue;
1052 while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
1053 mach_absolute_time() < deadline) {
1054 cpu_pause();
1055 }
1056 }
1057 }
1058
1059 #if MACH_ASSERT
1060 static inline boolean_t
1061 mp_call_head_is_locked(mp_call_queue_t *cqp)
1062 {
1063 return !ml_get_interrupts_enabled() &&
1064 hw_lock_held((hw_lock_t)&cqp->lock);
1065 }
1066 #endif
1067
1068 static inline void
1069 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
1070 {
1071 simple_unlock(&cqp->lock);
1072 ml_set_interrupts_enabled(intrs_enabled);
1073 }
1074
1075 static inline mp_call_t *
1076 mp_call_alloc(void)
1077 {
1078 mp_call_t *callp = NULL;
1079 boolean_t intrs_enabled;
1080 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1081
1082 intrs_enabled = mp_call_head_lock(cqp);
1083 if (!queue_empty(&cqp->queue))
1084 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1085 mp_call_head_unlock(cqp, intrs_enabled);
1086
1087 return callp;
1088 }
1089
1090 static inline void
1091 mp_call_free(mp_call_t *callp)
1092 {
1093 boolean_t intrs_enabled;
1094 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1095
1096 intrs_enabled = mp_call_head_lock(cqp);
1097 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1098 mp_call_head_unlock(cqp, intrs_enabled);
1099 }
1100
1101 static inline mp_call_t *
1102 mp_call_dequeue_locked(mp_call_queue_t *cqp)
1103 {
1104 mp_call_t *callp = NULL;
1105
1106 assert(mp_call_head_is_locked(cqp));
1107 if (!queue_empty(&cqp->queue))
1108 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1109 return callp;
1110 }
1111
1112 static inline void
1113 mp_call_enqueue_locked(
1114 mp_call_queue_t *cqp,
1115 mp_call_t *callp)
1116 {
1117 queue_enter(&cqp->queue, callp, typeof(callp), link);
1118 }
1119
1120 /* Called on the boot processor to initialize global structures */
1121 static void
1122 mp_cpus_call_init(void)
1123 {
1124 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1125
1126 DBG("mp_cpus_call_init()\n");
1127 simple_lock_init(&cqp->lock, 0);
1128 queue_init(&cqp->queue);
1129 }
1130
1131 /*
1132 * Called at processor registration to add call buffers to the free list
1133 * and to initialize the per-cpu call queue.
1134 */
1135 void
1136 mp_cpus_call_cpu_init(int cpu)
1137 {
1138 int i;
1139 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1140 mp_call_t *callp;
1141
1142 simple_lock_init(&cqp->lock, 0);
1143 queue_init(&cqp->queue);
1144 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1145 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
1146 mp_call_free(callp);
1147 }
1148
1149 DBG("mp_cpus_call_init(%d) done\n", cpu);
1150 }
1151
1152 /*
1153 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1154 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1155 */
1156 static void
1157 mp_cpus_call_action(void)
1158 {
1159 mp_call_queue_t *cqp;
1160 boolean_t intrs_enabled;
1161 mp_call_t *callp;
1162 mp_call_t call;
1163
1164 assert(!ml_get_interrupts_enabled());
1165 cqp = &mp_cpus_call_head[cpu_number()];
1166 intrs_enabled = mp_call_head_lock(cqp);
1167 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1168 /* Copy call request to the stack to free buffer */
1169 call = *callp;
1170 mp_call_free(callp);
1171 if (call.func != NULL) {
1172 mp_call_head_unlock(cqp, intrs_enabled);
1173 KERNEL_DEBUG_CONSTANT(
1174 TRACE_MP_CPUS_CALL_ACTION,
1175 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1176 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
1177 call.func(call.arg0, call.arg1);
1178 (void) mp_call_head_lock(cqp);
1179 }
1180 if (call.maskp != NULL)
1181 i_bit_set(cpu_number(), call.maskp);
1182 }
1183 mp_call_head_unlock(cqp, intrs_enabled);
1184 }
1185
1186 /*
1187 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1188 * Possible modes are:
1189 * SYNC: function is called serially on target cpus in logical cpu order
1190 * waiting for each call to be acknowledged before proceeding
1191 * ASYNC: function call is queued to the specified cpus
1192 * waiting for all calls to complete in parallel before returning
1193 * NOSYNC: function calls are queued
1194 * but we return before confirmation of calls completing.
1195 * The action function may be NULL.
1196 * The cpu mask may include the local cpu. Offline cpus are ignored.
1197 * The return value is the number of cpus on which the call was made or queued.
1198 */
1199 cpu_t
1200 mp_cpus_call(
1201 cpumask_t cpus,
1202 mp_sync_t mode,
1203 void (*action_func)(void *),
1204 void *arg)
1205 {
1206 return mp_cpus_call1(
1207 cpus,
1208 mode,
1209 (void (*)(void *,void *))action_func,
1210 arg,
1211 NULL,
1212 NULL);
1213 }
1214
1215 static void
1216 mp_cpus_call_wait(boolean_t intrs_enabled,
1217 cpumask_t cpus_called,
1218 cpumask_t *cpus_responded)
1219 {
1220 mp_call_queue_t *cqp;
1221 uint64_t tsc_spin_start;
1222
1223 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1224 cqp = &mp_cpus_call_head[cpu_number()];
1225
1226 tsc_spin_start = rdtsc64();
1227 while (*cpus_responded != cpus_called) {
1228 if (!intrs_enabled) {
1229 /* Sniffing w/o locking */
1230 if (!queue_empty(&cqp->queue))
1231 mp_cpus_call_action();
1232 cpu_signal_handler(NULL);
1233 }
1234 if (mp_spin_timeout(tsc_spin_start)) {
1235 cpumask_t cpus_unresponsive;
1236
1237 cpus_unresponsive = cpus_called & ~(*cpus_responded);
1238 NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
1239 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1240 cpus_unresponsive);
1241 }
1242 }
1243 }
1244
1245 cpu_t
1246 mp_cpus_call1(
1247 cpumask_t cpus,
1248 mp_sync_t mode,
1249 void (*action_func)(void *, void *),
1250 void *arg0,
1251 void *arg1,
1252 cpumask_t *cpus_calledp)
1253 {
1254 cpu_t cpu = 0;
1255 boolean_t intrs_enabled = FALSE;
1256 boolean_t call_self = FALSE;
1257 cpumask_t cpus_called = 0;
1258 cpumask_t cpus_responded = 0;
1259 long cpus_call_count = 0;
1260 uint64_t tsc_spin_start;
1261 boolean_t topo_lock;
1262
1263 KERNEL_DEBUG_CONSTANT(
1264 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1265 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
1266
1267 if (!smp_initialized) {
1268 if ((cpus & CPUMASK_SELF) == 0)
1269 goto out;
1270 if (action_func != NULL) {
1271 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1272 action_func(arg0, arg1);
1273 ml_set_interrupts_enabled(intrs_enabled);
1274 }
1275 call_self = TRUE;
1276 goto out;
1277 }
1278
1279 /*
1280 * Queue the call for each non-local requested cpu.
1281 * This is performed under the topo lock to prevent changes to
1282 * cpus online state and to prevent concurrent rendezvouses --
1283 * although an exception is made if we're calling only the master
1284 * processor since that always remains active. Note: this exception
1285 * is expected for longterm timer nosync cross-calls to the master cpu.
1286 */
1287 mp_disable_preemption();
1288 intrs_enabled = ml_get_interrupts_enabled();
1289 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1290 if (topo_lock) {
1291 ml_set_interrupts_enabled(FALSE);
1292 (void) mp_safe_spin_lock(&x86_topo_lock);
1293 }
1294 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1295 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1296 !cpu_datap(cpu)->cpu_running)
1297 continue;
1298 tsc_spin_start = rdtsc64();
1299 if (cpu == (cpu_t) cpu_number()) {
1300 /*
1301 * We don't IPI ourself and if calling asynchronously,
1302 * we defer our call until we have signalled all others.
1303 */
1304 call_self = TRUE;
1305 if (mode == SYNC && action_func != NULL) {
1306 KERNEL_DEBUG_CONSTANT(
1307 TRACE_MP_CPUS_CALL_LOCAL,
1308 VM_KERNEL_UNSLIDE(action_func),
1309 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1310 action_func(arg0, arg1);
1311 }
1312 } else {
1313 /*
1314 * Here to queue a call to cpu and IPI.
1315 */
1316 mp_call_t *callp = NULL;
1317 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1318 boolean_t intrs_inner;
1319
1320 queue_call:
1321 if (callp == NULL)
1322 callp = mp_call_alloc();
1323 intrs_inner = mp_call_head_lock(cqp);
1324 if (callp == NULL) {
1325 mp_call_head_unlock(cqp, intrs_inner);
1326 KERNEL_DEBUG_CONSTANT(
1327 TRACE_MP_CPUS_CALL_NOBUF,
1328 cpu, 0, 0, 0, 0);
1329 if (!intrs_inner) {
1330 /* Sniffing w/o locking */
1331 if (!queue_empty(&cqp->queue))
1332 mp_cpus_call_action();
1333 handle_pending_TLB_flushes();
1334 }
1335 if (mp_spin_timeout(tsc_spin_start))
1336 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1337 tsc_spin_start, rdtsc64());
1338 goto queue_call;
1339 }
1340 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
1341 callp->func = action_func;
1342 callp->arg0 = arg0;
1343 callp->arg1 = arg1;
1344 mp_call_enqueue_locked(cqp, callp);
1345 cpus_call_count++;
1346 cpus_called |= cpu_to_cpumask(cpu);
1347 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1348 mp_call_head_unlock(cqp, intrs_inner);
1349 if (mode == SYNC) {
1350 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
1351 }
1352 }
1353 }
1354 if (topo_lock) {
1355 simple_unlock(&x86_topo_lock);
1356 ml_set_interrupts_enabled(intrs_enabled);
1357 }
1358
1359 /* Call locally if mode not SYNC */
1360 if (mode != SYNC && call_self ) {
1361 KERNEL_DEBUG_CONSTANT(
1362 TRACE_MP_CPUS_CALL_LOCAL,
1363 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1364 if (action_func != NULL) {
1365 ml_set_interrupts_enabled(FALSE);
1366 action_func(arg0, arg1);
1367 ml_set_interrupts_enabled(intrs_enabled);
1368 }
1369 }
1370
1371 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1372 if (mode == ASYNC)
1373 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
1374
1375 /* Safe to allow pre-emption now */
1376 mp_enable_preemption();
1377
1378 out:
1379 if (call_self){
1380 cpus_called |= cpu_to_cpumask(cpu);
1381 cpus_call_count++;
1382 }
1383
1384 if (cpus_calledp)
1385 *cpus_calledp = cpus_called;
1386
1387 KERNEL_DEBUG_CONSTANT(
1388 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1389 cpus_call_count, cpus_called, 0, 0, 0);
1390
1391 return (cpu_t) cpus_call_count;
1392 }
1393
1394
1395 static void
1396 mp_broadcast_action(__unused void *null)
1397 {
1398 /* call action function */
1399 if (mp_bc_action_func != NULL)
1400 mp_bc_action_func(mp_bc_func_arg);
1401
1402 /* if we're the last one through, wake up the instigator */
1403 if (atomic_decl_and_test(&mp_bc_count, 1))
1404 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1405 }
1406
1407 /*
1408 * mp_broadcast() runs a given function on all active cpus.
1409 * The caller blocks until the functions has run on all cpus.
1410 * The caller will also block if there is another pending braodcast.
1411 */
1412 void
1413 mp_broadcast(
1414 void (*action_func)(void *),
1415 void *arg)
1416 {
1417 if (!smp_initialized) {
1418 if (action_func != NULL)
1419 action_func(arg);
1420 return;
1421 }
1422
1423 /* obtain broadcast lock */
1424 lck_mtx_lock(&mp_bc_lock);
1425
1426 /* set static function pointers */
1427 mp_bc_action_func = action_func;
1428 mp_bc_func_arg = arg;
1429
1430 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1431
1432 /*
1433 * signal other processors, which will call mp_broadcast_action()
1434 */
1435 mp_bc_count = real_ncpus; /* assume max possible active */
1436 mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1;
1437 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
1438
1439 /* call executor function on this cpu */
1440 mp_broadcast_action(NULL);
1441
1442 /* block for other cpus to have run action_func */
1443 if (mp_bc_ncpus > 1)
1444 thread_block(THREAD_CONTINUE_NULL);
1445 else
1446 clear_wait(current_thread(), THREAD_AWAKENED);
1447
1448 /* release lock */
1449 lck_mtx_unlock(&mp_bc_lock);
1450 }
1451
1452 void
1453 mp_cpus_kick(cpumask_t cpus)
1454 {
1455 cpu_t cpu;
1456 boolean_t intrs_enabled = FALSE;
1457
1458 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1459 mp_safe_spin_lock(&x86_topo_lock);
1460
1461 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1462 if ((cpu == (cpu_t) cpu_number())
1463 || ((cpu_to_cpumask(cpu) & cpus) == 0)
1464 || (!cpu_datap(cpu)->cpu_running))
1465 {
1466 continue;
1467 }
1468
1469 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1470 }
1471
1472 simple_unlock(&x86_topo_lock);
1473 ml_set_interrupts_enabled(intrs_enabled);
1474 }
1475
1476 void
1477 i386_activate_cpu(void)
1478 {
1479 cpu_data_t *cdp = current_cpu_datap();
1480
1481 assert(!ml_get_interrupts_enabled());
1482
1483 if (!smp_initialized) {
1484 cdp->cpu_running = TRUE;
1485 return;
1486 }
1487
1488 mp_safe_spin_lock(&x86_topo_lock);
1489 cdp->cpu_running = TRUE;
1490 started_cpu();
1491 simple_unlock(&x86_topo_lock);
1492 flush_tlb_raw();
1493 }
1494
1495 void
1496 i386_deactivate_cpu(void)
1497 {
1498 cpu_data_t *cdp = current_cpu_datap();
1499
1500 assert(!ml_get_interrupts_enabled());
1501
1502 KERNEL_DEBUG_CONSTANT(
1503 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1504 0, 0, 0, 0, 0);
1505
1506 mp_safe_spin_lock(&x86_topo_lock);
1507 cdp->cpu_running = FALSE;
1508 simple_unlock(&x86_topo_lock);
1509
1510 /*
1511 * Move all of this cpu's timers to the master/boot cpu,
1512 * and poke it in case there's a sooner deadline for it to schedule.
1513 */
1514 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1515 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
1516
1517 #if MONOTONIC
1518 mt_cpu_down(cdp);
1519 #endif /* MONOTONIC */
1520
1521 /*
1522 * Open an interrupt window
1523 * and ensure any pending IPI or timer is serviced
1524 */
1525 mp_disable_preemption();
1526 ml_set_interrupts_enabled(TRUE);
1527
1528 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1529 cpu_pause();
1530 /*
1531 * Ensure there's no remaining timer deadline set
1532 * - AICPM may have left one active.
1533 */
1534 setPop(0);
1535
1536 ml_set_interrupts_enabled(FALSE);
1537 mp_enable_preemption();
1538
1539 KERNEL_DEBUG_CONSTANT(
1540 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1541 0, 0, 0, 0, 0);
1542 }
1543
1544 int pmsafe_debug = 1;
1545
1546 #if MACH_KDP
1547 volatile boolean_t mp_kdp_trap = FALSE;
1548 volatile boolean_t mp_kdp_is_NMI = FALSE;
1549 volatile unsigned long mp_kdp_ncpus;
1550 boolean_t mp_kdp_state;
1551
1552
1553 void
1554 mp_kdp_enter(boolean_t proceed_on_failure)
1555 {
1556 unsigned int cpu;
1557 unsigned int ncpus = 0;
1558 unsigned int my_cpu;
1559 uint64_t tsc_timeout;
1560
1561 DBG("mp_kdp_enter()\n");
1562
1563 /*
1564 * Here to enter the debugger.
1565 * In case of races, only one cpu is allowed to enter kdp after
1566 * stopping others.
1567 */
1568 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1569 my_cpu = cpu_number();
1570
1571 if (my_cpu == (unsigned) debugger_cpu) {
1572 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1573 kdp_reset();
1574 return;
1575 }
1576
1577 uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1578 int locked = 0;
1579 while (!locked || mp_kdp_trap) {
1580 if (locked) {
1581 simple_unlock(&x86_topo_lock);
1582 }
1583 if (proceed_on_failure) {
1584 if (mach_absolute_time() - start_time > 500000000ll) {
1585 kprintf("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1586 break;
1587 }
1588 locked = simple_lock_try(&x86_topo_lock);
1589 if (!locked) {
1590 cpu_pause();
1591 }
1592 } else {
1593 mp_safe_spin_lock(&x86_topo_lock);
1594 locked = TRUE;
1595 }
1596
1597 if (locked && mp_kdp_trap) {
1598 simple_unlock(&x86_topo_lock);
1599 DBG("mp_kdp_enter() race lost\n");
1600 #if MACH_KDP
1601 mp_kdp_wait(TRUE, FALSE);
1602 #endif
1603 locked = FALSE;
1604 }
1605 }
1606
1607 if (pmsafe_debug && !kdp_snapshot)
1608 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1609
1610 debugger_cpu = my_cpu;
1611 ncpus = 1;
1612 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1613 mp_kdp_trap = TRUE;
1614 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1615
1616 /*
1617 * Deliver a nudge to other cpus, counting how many
1618 */
1619 DBG("mp_kdp_enter() signaling other processors\n");
1620 if (force_immediate_debugger_NMI == FALSE) {
1621 for (cpu = 0; cpu < real_ncpus; cpu++) {
1622 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1623 continue;
1624 ncpus++;
1625 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1626 }
1627 /*
1628 * Wait other processors to synchronize
1629 */
1630 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1631
1632 /*
1633 * This timeout is rather arbitrary; we don't want to NMI
1634 * processors that are executing at potentially
1635 * "unsafe-to-interrupt" points such as the trampolines,
1636 * but neither do we want to lose state by waiting too long.
1637 */
1638 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1639
1640 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1641 /*
1642 * A TLB shootdown request may be pending--this would
1643 * result in the requesting processor waiting in
1644 * PMAP_UPDATE_TLBS() until this processor deals with it.
1645 * Process it, so it can now enter mp_kdp_wait()
1646 */
1647 handle_pending_TLB_flushes();
1648 cpu_pause();
1649 }
1650 /* If we've timed out, and some processor(s) are still unresponsive,
1651 * interrupt them with an NMI via the local APIC, iff a panic is
1652 * in progress.
1653 */
1654 if (panic_active()) {
1655 NMIPI_enable(TRUE);
1656 }
1657 if (mp_kdp_ncpus != ncpus) {
1658 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1659 for (cpu = 0; cpu < real_ncpus; cpu++) {
1660 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1661 continue;
1662 if (cpu_signal_pending(cpu, MP_KDP))
1663 cpu_NMI_interrupt(cpu);
1664 }
1665 /* Wait again for the same timeout */
1666 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1667 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1668 handle_pending_TLB_flushes();
1669 cpu_pause();
1670 }
1671 if (mp_kdp_ncpus != ncpus) {
1672 panic("mp_kdp_enter() timed-out waiting after NMI");
1673 }
1674 }
1675 }
1676 else
1677 for (cpu = 0; cpu < real_ncpus; cpu++) {
1678 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1679 continue;
1680 cpu_NMI_interrupt(cpu);
1681 }
1682
1683 if (locked) {
1684 simple_unlock(&x86_topo_lock);
1685 }
1686
1687 DBG("mp_kdp_enter() %d processors done %s\n",
1688 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1689
1690 postcode(MP_KDP_ENTER);
1691 }
1692
1693 static boolean_t
1694 cpu_signal_pending(int cpu, mp_event_t event)
1695 {
1696 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1697 boolean_t retval = FALSE;
1698
1699 if (i_bit(event, signals))
1700 retval = TRUE;
1701 return retval;
1702 }
1703
1704 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1705 void *arg0, void *arg1)
1706 {
1707 if (lcpu > (real_ncpus - 1))
1708 return -1;
1709
1710 if (func == NULL)
1711 return -1;
1712
1713 kdp_xcpu_call_func.func = func;
1714 kdp_xcpu_call_func.ret = -1;
1715 kdp_xcpu_call_func.arg0 = arg0;
1716 kdp_xcpu_call_func.arg1 = arg1;
1717 kdp_xcpu_call_func.cpu = lcpu;
1718 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1719 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1720 cpu_pause();
1721 return kdp_xcpu_call_func.ret;
1722 }
1723
1724 static void
1725 kdp_x86_xcpu_poll(void)
1726 {
1727 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1728 kdp_xcpu_call_func.ret =
1729 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1730 kdp_xcpu_call_func.arg1,
1731 cpu_number());
1732 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1733 }
1734 }
1735
1736 static void
1737 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1738 {
1739 DBG("mp_kdp_wait()\n");
1740
1741 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1742 #if CONFIG_MCA
1743 /* If we've trapped due to a machine-check, save MCA registers */
1744 mca_check_save();
1745 #endif
1746
1747 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1748 while (mp_kdp_trap || (isNMI == TRUE)) {
1749 /*
1750 * A TLB shootdown request may be pending--this would result
1751 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1752 * until this processor handles it.
1753 * Process it, so it can now enter mp_kdp_wait()
1754 */
1755 if (flush)
1756 handle_pending_TLB_flushes();
1757
1758 kdp_x86_xcpu_poll();
1759 cpu_pause();
1760 }
1761
1762 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1763 DBG("mp_kdp_wait() done\n");
1764 }
1765
1766 void
1767 mp_kdp_exit(void)
1768 {
1769 DBG("mp_kdp_exit()\n");
1770 debugger_cpu = -1;
1771 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1772
1773 debugger_exit_time = mach_absolute_time();
1774
1775 mp_kdp_trap = FALSE;
1776 mfence();
1777
1778 /* Wait other processors to stop spinning. XXX needs timeout */
1779 DBG("mp_kdp_exit() waiting for processors to resume\n");
1780 while (mp_kdp_ncpus > 0) {
1781 /*
1782 * a TLB shootdown request may be pending... this would result in the requesting
1783 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1784 * Process it, so it can now enter mp_kdp_wait()
1785 */
1786 handle_pending_TLB_flushes();
1787
1788 cpu_pause();
1789 }
1790
1791 if (pmsafe_debug && !kdp_snapshot)
1792 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1793
1794 debugger_exit_time = mach_absolute_time();
1795
1796 DBG("mp_kdp_exit() done\n");
1797 (void) ml_set_interrupts_enabled(mp_kdp_state);
1798 postcode(MP_KDP_EXIT);
1799 }
1800
1801 #endif /* MACH_KDP */
1802
1803 boolean_t
1804 mp_recent_debugger_activity(void) {
1805 uint64_t abstime = mach_absolute_time();
1806 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1807 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1808 }
1809
1810 /*ARGSUSED*/
1811 void
1812 init_ast_check(
1813 __unused processor_t processor)
1814 {
1815 }
1816
1817 void
1818 cause_ast_check(
1819 processor_t processor)
1820 {
1821 int cpu = processor->cpu_id;
1822
1823 if (cpu != cpu_number()) {
1824 i386_signal_cpu(cpu, MP_AST, ASYNC);
1825 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1826 }
1827 }
1828
1829 void
1830 slave_machine_init(void *param)
1831 {
1832 /*
1833 * Here in process context, but with interrupts disabled.
1834 */
1835 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1836
1837 if (param == FULL_SLAVE_INIT) {
1838 /*
1839 * Cold start
1840 */
1841 clock_init();
1842 }
1843 cpu_machine_init(); /* Interrupts enabled hereafter */
1844 }
1845
1846 #undef cpu_number
1847 int cpu_number(void)
1848 {
1849 return get_cpu_number();
1850 }
1851
1852 static void
1853 cpu_prewarm_init()
1854 {
1855 int i;
1856
1857 simple_lock_init(&cpu_warm_lock, 0);
1858 queue_init(&cpu_warm_call_list);
1859 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1860 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1861 }
1862 }
1863
1864 static timer_call_t
1865 grab_warm_timer_call()
1866 {
1867 spl_t x;
1868 timer_call_t call = NULL;
1869
1870 x = splsched();
1871 simple_lock(&cpu_warm_lock);
1872 if (!queue_empty(&cpu_warm_call_list)) {
1873 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1874 }
1875 simple_unlock(&cpu_warm_lock);
1876 splx(x);
1877
1878 return call;
1879 }
1880
1881 static void
1882 free_warm_timer_call(timer_call_t call)
1883 {
1884 spl_t x;
1885
1886 x = splsched();
1887 simple_lock(&cpu_warm_lock);
1888 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1889 simple_unlock(&cpu_warm_lock);
1890 splx(x);
1891 }
1892
1893 /*
1894 * Runs in timer call context (interrupts disabled).
1895 */
1896 static void
1897 cpu_warm_timer_call_func(
1898 call_entry_param_t p0,
1899 __unused call_entry_param_t p1)
1900 {
1901 free_warm_timer_call((timer_call_t)p0);
1902 return;
1903 }
1904
1905 /*
1906 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1907 */
1908 static void
1909 _cpu_warm_setup(
1910 void *arg)
1911 {
1912 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1913
1914 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
1915 cwdp->cwd_result = 0;
1916
1917 return;
1918 }
1919
1920 /*
1921 * Not safe to call with interrupts disabled.
1922 */
1923 kern_return_t
1924 ml_interrupt_prewarm(
1925 uint64_t deadline)
1926 {
1927 struct cpu_warm_data cwd;
1928 timer_call_t call;
1929 cpu_t ct;
1930
1931 if (ml_get_interrupts_enabled() == FALSE) {
1932 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1933 }
1934
1935 /*
1936 * If the platform doesn't need our help, say that we succeeded.
1937 */
1938 if (!ml_get_interrupt_prewake_applicable()) {
1939 return KERN_SUCCESS;
1940 }
1941
1942 /*
1943 * Grab a timer call to use.
1944 */
1945 call = grab_warm_timer_call();
1946 if (call == NULL) {
1947 return KERN_RESOURCE_SHORTAGE;
1948 }
1949
1950 timer_call_setup(call, cpu_warm_timer_call_func, call);
1951 cwd.cwd_call = call;
1952 cwd.cwd_deadline = deadline;
1953 cwd.cwd_result = 0;
1954
1955 /*
1956 * For now, non-local interrupts happen on the master processor.
1957 */
1958 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1959 if (ct == 0) {
1960 free_warm_timer_call(call);
1961 return KERN_FAILURE;
1962 } else {
1963 return cwd.cwd_result;
1964 }
1965 }
1966
1967 #if DEBUG || DEVELOPMENT
1968 void
1969 kernel_spin(uint64_t spin_ns)
1970 {
1971 boolean_t istate;
1972 uint64_t spin_abs;
1973 uint64_t deadline;
1974 cpu_data_t *cdp;
1975
1976 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
1977 istate = ml_set_interrupts_enabled(FALSE);
1978 cdp = current_cpu_datap();
1979 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
1980
1981 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
1982 cdp->cpu_int_event_time = mach_absolute_time();
1983 cdp->cpu_int_state = (void *) USER_STATE(current_thread());
1984
1985 deadline = mach_absolute_time() + spin_ns;
1986 while (mach_absolute_time() < deadline)
1987 cpu_pause();
1988
1989 cdp->cpu_int_event_time = 0;
1990 cdp->cpu_int_state = NULL;
1991
1992 ml_set_interrupts_enabled(istate);
1993 kprintf("kernel_spin() continuing\n");
1994 }
1995
1996 /*
1997 * Called from the scheduler's maintenance thread,
1998 * scan running processors for long-running ISRs and:
1999 * - panic if longer than LockTimeOut, or
2000 * - log if more than a quantum.
2001 */
2002 void
2003 mp_interrupt_watchdog(void)
2004 {
2005 cpu_t cpu;
2006 boolean_t intrs_enabled = FALSE;
2007 uint16_t cpu_int_num;
2008 uint64_t cpu_int_event_time;
2009 uint64_t cpu_rip;
2010 uint64_t cpu_int_duration;
2011 uint64_t now;
2012 x86_saved_state_t *cpu_int_state;
2013
2014 if (__improbable(!mp_interrupt_watchdog_enabled))
2015 return;
2016
2017 intrs_enabled = ml_set_interrupts_enabled(FALSE);
2018 now = mach_absolute_time();
2019 /*
2020 * While timeouts are not suspended,
2021 * check all other processors for long outstanding interrupt handling.
2022 */
2023 for (cpu = 0;
2024 cpu < (cpu_t) real_ncpus && !machine_timeout_suspended();
2025 cpu++) {
2026 if ((cpu == (cpu_t) cpu_number()) ||
2027 (!cpu_datap(cpu)->cpu_running))
2028 continue;
2029 cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time;
2030 if (cpu_int_event_time == 0)
2031 continue;
2032 if (__improbable(now < cpu_int_event_time))
2033 continue; /* skip due to inter-processor skew */
2034 cpu_int_state = cpu_datap(cpu)->cpu_int_state;
2035 if (__improbable(cpu_int_state == NULL))
2036 /* The interrupt may have been dismissed */
2037 continue;
2038
2039 /* Here with a cpu handling an interrupt */
2040
2041 cpu_int_duration = now - cpu_int_event_time;
2042 if (__improbable(cpu_int_duration > LockTimeOut)) {
2043 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2044 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2045 vector_timed_out = cpu_int_num;
2046 NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG);
2047 panic("Interrupt watchdog, "
2048 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2049 cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip);
2050 /* NOT REACHED */
2051 } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) {
2052 mp_interrupt_watchdog_events++;
2053 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2054 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2055 ml_set_interrupts_enabled(intrs_enabled);
2056 printf("Interrupt watchdog, "
2057 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2058 cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip);
2059 return;
2060 }
2061 }
2062
2063 ml_set_interrupts_enabled(intrs_enabled);
2064 }
2065 #endif