]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/mp.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
CommitLineData
55e303ae 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31
55e303ae 32#include <mach_kdp.h>
39037602 33#include <kdp/kdp_internal.h>
55e303ae 34#include <mach_ldebug.h>
91447636
A
35#include <gprof.h>
36
37#include <mach/mach_types.h>
38#include <mach/kern_return.h>
39
40#include <kern/kern_types.h>
41#include <kern/startup.h>
c910b4d9 42#include <kern/timer_queue.h>
91447636
A
43#include <kern/processor.h>
44#include <kern/cpu_number.h>
45#include <kern/cpu_data.h>
46#include <kern/assert.h>
47#include <kern/machine.h>
0c530ab8 48#include <kern/pms.h>
593a1d5f 49#include <kern/misc_protos.h>
39236c6e 50#include <kern/timer_call.h>
6d2010ae
A
51#include <kern/kalloc.h>
52#include <kern/queue.h>
fe8ab488 53#include <prng/random.h>
91447636
A
54
55#include <vm/vm_map.h>
56#include <vm/vm_kern.h>
57
58#include <profiling/profile-mk.h>
55e303ae 59
fe8ab488 60#include <i386/bit_routines.h>
b0d623f7
A
61#include <i386/proc_reg.h>
62#include <i386/cpu_threads.h>
63#include <i386/mp_desc.h>
64#include <i386/misc_protos.h>
65#include <i386/trap.h>
66#include <i386/postcode.h>
67#include <i386/machine_routines.h>
55e303ae
A
68#include <i386/mp.h>
69#include <i386/mp_events.h>
593a1d5f 70#include <i386/lapic.h>
55e303ae 71#include <i386/cpuid.h>
b0d623f7 72#include <i386/fpu.h>
55e303ae 73#include <i386/machine_cpu.h>
0c530ab8 74#include <i386/pmCPU.h>
b0d623f7 75#if CONFIG_MCA
2d21ac55 76#include <i386/machine_check.h>
b0d623f7
A
77#endif
78#include <i386/acpi.h>
0c530ab8
A
79
80#include <chud/chud_xnu.h>
81#include <chud/chud_xnu_private.h>
82
83#include <sys/kdebug.h>
55e303ae 84
39236c6e
A
85#include <console/serial_protos.h>
86
5ba3f43e
A
87#if MONOTONIC
88#include <kern/monotonic.h>
89#endif /* MONOTONIC */
90
55e303ae
A
91#if MP_DEBUG
92#define PAUSE delay(1000000)
93#define DBG(x...) kprintf(x)
94#else
95#define DBG(x...)
96#define PAUSE
97#endif /* MP_DEBUG */
98
6d2010ae
A
99/* Debugging/test trace events: */
100#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
101#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
102#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
103#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
104#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
bd504ef0
A
105#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
106#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
107#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
55e303ae 108
7e4a7d39
A
109#define ABS(v) (((v) > 0)?(v):-(v))
110
55e303ae 111void slave_boot_init(void);
6d2010ae 112void i386_cpu_IPI(int cpu);
55e303ae 113
39236c6e 114#if MACH_KDP
b0d623f7 115static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
39236c6e 116#endif /* MACH_KDP */
55e303ae 117
39236c6e 118#if MACH_KDP
0c530ab8 119static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
39236c6e 120#endif /* MACH_KDP */
593a1d5f 121static int NMIInterruptHandler(x86_saved_state_t *regs);
0c530ab8 122
b0d623f7 123boolean_t smp_initialized = FALSE;
7e4a7d39 124uint32_t TSC_sync_margin = 0xFFF;
935ed37a
A
125volatile boolean_t force_immediate_debugger_NMI = FALSE;
126volatile boolean_t pmap_tlb_flush_timeout = FALSE;
5ba3f43e
A
127#if DEBUG || DEVELOPMENT
128boolean_t mp_interrupt_watchdog_enabled = TRUE;
129uint32_t mp_interrupt_watchdog_events = 0;
130#endif
91447636 131
39037602
A
132decl_simple_lock_data(,debugger_callback_lock);
133struct debugger_callback *debugger_callback = NULL;
134
b0d623f7
A
135decl_lck_mtx_data(static, mp_cpu_boot_lock);
136lck_mtx_ext_t mp_cpu_boot_lock_ext;
55e303ae
A
137
138/* Variables needed for MP rendezvous. */
0c530ab8 139decl_simple_lock_data(,mp_rv_lock);
b0d623f7
A
140static void (*mp_rv_setup_func)(void *arg);
141static void (*mp_rv_action_func)(void *arg);
142static void (*mp_rv_teardown_func)(void *arg);
143static void *mp_rv_func_arg;
144static volatile int mp_rv_ncpus;
0c530ab8
A
145 /* Cache-aligned barriers: */
146static volatile long mp_rv_entry __attribute__((aligned(64)));
147static volatile long mp_rv_exit __attribute__((aligned(64)));
148static volatile long mp_rv_complete __attribute__((aligned(64)));
55e303ae 149
b0d623f7
A
150volatile uint64_t debugger_entry_time;
151volatile uint64_t debugger_exit_time;
152#if MACH_KDP
7ddcb079 153#include <kdp/kdp.h>
d41d1dae 154extern int kdp_snapshot;
b0d623f7
A
155static struct _kdp_xcpu_call_func {
156 kdp_x86_xcpu_func_t func;
157 void *arg0, *arg1;
158 volatile long ret;
159 volatile uint16_t cpu;
160} kdp_xcpu_call_func = {
161 .cpu = KDP_XCPU_NONE
162};
163
164#endif
165
2d21ac55
A
166/* Variables needed for MP broadcast. */
167static void (*mp_bc_action_func)(void *arg);
168static void *mp_bc_func_arg;
593a1d5f 169static int mp_bc_ncpus;
2d21ac55 170static volatile long mp_bc_count;
b0d623f7
A
171decl_lck_mtx_data(static, mp_bc_lock);
172lck_mtx_ext_t mp_bc_lock_ext;
593a1d5f 173static volatile int debugger_cpu = -1;
39236c6e
A
174volatile long NMIPI_acks = 0;
175volatile long NMI_count = 0;
5ba3f43e
A
176static NMI_reason_t NMI_panic_reason = NONE;
177static int vector_timed_out;
39236c6e
A
178
179extern void NMI_cpus(void);
2d21ac55 180
6d2010ae 181static void mp_cpus_call_init(void);
2d21ac55 182static void mp_cpus_call_action(void);
c910b4d9 183static void mp_call_PM(void);
2d21ac55 184
b0d623f7
A
185char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
186
6d2010ae
A
187/* PAL-related routines */
188boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
189 int ipi_vector, i386_intr_func_t ipi_handler);
190void i386_start_cpu(int lapic_id, int cpu_num);
191void i386_send_NMI(int cpu);
b0d623f7 192
91447636
A
193#if GPROF
194/*
195 * Initialize dummy structs for profiling. These aren't used but
196 * allows hertz_tick() to be built with GPROF defined.
197 */
198struct profile_vars _profile_vars;
199struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
200#define GPROF_INIT() \
201{ \
202 int i; \
203 \
204 /* Hack to initialize pointers to unused profiling structs */ \
205 for (i = 1; i < MAX_CPUS; i++) \
206 _profile_vars_cpus[i] = &_profile_vars; \
207}
208#else
209#define GPROF_INIT()
210#endif /* GPROF */
211
b0d623f7
A
212static lck_grp_t smp_lck_grp;
213static lck_grp_attr_t smp_lck_grp_attr;
214
6d2010ae
A
215#define NUM_CPU_WARM_CALLS 20
216struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
217queue_head_t cpu_warm_call_list;
218decl_simple_lock_data(static, cpu_warm_lock);
219
220typedef struct cpu_warm_data {
221 timer_call_t cwd_call;
222 uint64_t cwd_deadline;
223 int cwd_result;
224} *cpu_warm_data_t;
225
226static void cpu_prewarm_init(void);
227static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
228static void _cpu_warm_setup(void *arg);
229static timer_call_t grab_warm_timer_call(void);
230static void free_warm_timer_call(timer_call_t call);
b0d623f7 231
55e303ae
A
232void
233smp_init(void)
55e303ae 234{
91447636 235 simple_lock_init(&mp_rv_lock, 0);
39037602 236 simple_lock_init(&debugger_callback_lock, 0);
b0d623f7
A
237 lck_grp_attr_setdefault(&smp_lck_grp_attr);
238 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
239 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
240 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
91447636 241 console_init();
55e303ae 242
6d2010ae
A
243 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
244 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
55e303ae
A
245 return;
246
91447636
A
247 cpu_thread_init();
248
91447636
A
249 GPROF_INIT();
250 DBGLOG_CPU_INIT(master_cpu);
251
6d2010ae 252 mp_cpus_call_init();
fe8ab488 253 mp_cpus_call_cpu_init(master_cpu);
55e303ae 254
5ba3f43e
A
255#if DEBUG || DEVELOPMENT
256 if (PE_parse_boot_argn("interrupt_watchdog",
257 &mp_interrupt_watchdog_enabled,
258 sizeof(mp_interrupt_watchdog_enabled))) {
259 kprintf("Interrupt watchdog %sabled\n",
260 mp_interrupt_watchdog_enabled ? "en" : "dis");
261 }
262#endif
263
7e4a7d39 264 if (PE_parse_boot_argn("TSC_sync_margin",
316670eb 265 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
7e4a7d39 266 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
316670eb
A
267 } else if (cpuid_vmm_present()) {
268 kprintf("TSC sync margin disabled\n");
269 TSC_sync_margin = 0;
270 }
55e303ae
A
271 smp_initialized = TRUE;
272
6d2010ae
A
273 cpu_prewarm_init();
274
55e303ae
A
275 return;
276}
277
7e4a7d39
A
278typedef struct {
279 int target_cpu;
280 int target_lapic;
281 int starter_cpu;
282} processor_start_info_t;
283static processor_start_info_t start_info __attribute__((aligned(64)));
284
285/*
286 * Cache-alignment is to avoid cross-cpu false-sharing interference.
287 */
288static volatile long tsc_entry_barrier __attribute__((aligned(64)));
289static volatile long tsc_exit_barrier __attribute__((aligned(64)));
290static volatile uint64_t tsc_target __attribute__((aligned(64)));
291
0c530ab8 292/*
593a1d5f 293 * Poll a CPU to see when it has marked itself as running.
0c530ab8 294 */
593a1d5f
A
295static void
296mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
91447636 297{
7e4a7d39 298 while (iters-- > 0) {
593a1d5f 299 if (cpu_datap(slot_num)->cpu_running)
7e4a7d39 300 break;
593a1d5f 301 delay(usecdelay);
91447636 302 }
55e303ae
A
303}
304
b0d623f7
A
305/*
306 * Quickly bring a CPU back online which has been halted.
307 */
308kern_return_t
309intel_startCPU_fast(int slot_num)
310{
7e4a7d39 311 kern_return_t rc;
b0d623f7
A
312
313 /*
314 * Try to perform a fast restart
315 */
316 rc = pmCPUExitHalt(slot_num);
317 if (rc != KERN_SUCCESS)
318 /*
319 * The CPU was not eligible for a fast restart.
320 */
321 return(rc);
322
bd504ef0
A
323 KERNEL_DEBUG_CONSTANT(
324 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
325 slot_num, 0, 0, 0, 0);
326
b0d623f7
A
327 /*
328 * Wait until the CPU is back online.
329 */
330 mp_disable_preemption();
331
332 /*
333 * We use short pauses (1us) for low latency. 30,000 iterations is
334 * longer than a full restart would require so it should be more
335 * than long enough.
336 */
6d2010ae 337
b0d623f7
A
338 mp_wait_for_cpu_up(slot_num, 30000, 1);
339 mp_enable_preemption();
340
bd504ef0
A
341 KERNEL_DEBUG_CONSTANT(
342 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
343 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
344
b0d623f7
A
345 /*
346 * Check to make sure that the CPU is really running. If not,
347 * go through the slow path.
348 */
349 if (cpu_datap(slot_num)->cpu_running)
350 return(KERN_SUCCESS);
7e4a7d39 351 else
b0d623f7
A
352 return(KERN_FAILURE);
353}
354
7e4a7d39
A
355static void
356started_cpu(void)
357{
358 /* Here on the started cpu with cpu_running set TRUE */
c910b4d9 359
7e4a7d39
A
360 if (TSC_sync_margin &&
361 start_info.target_cpu == cpu_number()) {
362 /*
363 * I've just started-up, synchronize again with the starter cpu
364 * and then snap my TSC.
365 */
366 tsc_target = 0;
367 atomic_decl(&tsc_entry_barrier, 1);
368 while (tsc_entry_barrier != 0)
369 ; /* spin for starter and target at barrier */
370 tsc_target = rdtsc64();
371 atomic_decl(&tsc_exit_barrier, 1);
372 }
373}
c910b4d9
A
374
375static void
376start_cpu(void *arg)
377{
378 int i = 1000;
379 processor_start_info_t *psip = (processor_start_info_t *) arg;
380
381 /* Ignore this if the current processor is not the starter */
382 if (cpu_number() != psip->starter_cpu)
383 return;
384
bd504ef0
A
385 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
386 arg, psip->target_cpu, psip->target_lapic);
387
388 KERNEL_DEBUG_CONSTANT(
389 TRACE_MP_CPU_START | DBG_FUNC_START,
390 psip->target_cpu,
391 psip->target_lapic, 0, 0, 0);
392
6d2010ae 393 i386_start_cpu(psip->target_lapic, psip->target_cpu);
c910b4d9
A
394
395#ifdef POSTCODE_DELAY
396 /* Wait much longer if postcodes are displayed for a delay period. */
397 i *= 10000;
398#endif
bd504ef0
A
399 DBG("start_cpu(%p) about to wait for cpu %d\n",
400 arg, psip->target_cpu);
401
c910b4d9 402 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
bd504ef0
A
403
404 KERNEL_DEBUG_CONSTANT(
405 TRACE_MP_CPU_START | DBG_FUNC_END,
406 psip->target_cpu,
407 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
408
7e4a7d39
A
409 if (TSC_sync_margin &&
410 cpu_datap(psip->target_cpu)->cpu_running) {
411 /*
412 * Compare the TSC from the started processor with ours.
413 * Report and log/panic if it diverges by more than
414 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
415 * can be overriden by boot-arg (with 0 meaning no checking).
416 */
417 uint64_t tsc_starter;
418 int64_t tsc_delta;
419 atomic_decl(&tsc_entry_barrier, 1);
420 while (tsc_entry_barrier != 0)
421 ; /* spin for both processors at barrier */
422 tsc_starter = rdtsc64();
423 atomic_decl(&tsc_exit_barrier, 1);
424 while (tsc_exit_barrier != 0)
425 ; /* spin for target to store its TSC */
426 tsc_delta = tsc_target - tsc_starter;
427 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
428 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
429 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
430#if DEBUG
431 panic(
432#else
433 printf(
434#endif
435 "Unsynchronized TSC for cpu %d: "
436 "0x%016llx, delta 0x%llx\n",
437 psip->target_cpu, tsc_target, tsc_delta);
438 }
439 }
c910b4d9
A
440}
441
55e303ae
A
442kern_return_t
443intel_startCPU(
444 int slot_num)
445{
c910b4d9
A
446 int lapic = cpu_to_lapic[slot_num];
447 boolean_t istate;
55e303ae 448
91447636
A
449 assert(lapic != -1);
450
451 DBGLOG_CPU_INIT(slot_num);
55e303ae 452
91447636 453 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
6d2010ae 454 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
55e303ae 455
0c530ab8
A
456 /*
457 * Initialize (or re-initialize) the descriptor tables for this cpu.
458 * Propagate processor mode to slave.
459 */
5ba3f43e 460 cpu_desc_init(cpu_datap(slot_num));
91447636 461
c910b4d9 462 /* Serialize use of the slave boot stack, etc. */
b0d623f7 463 lck_mtx_lock(&mp_cpu_boot_lock);
55e303ae 464
c910b4d9 465 istate = ml_set_interrupts_enabled(FALSE);
91447636 466 if (slot_num == get_cpu_number()) {
c910b4d9 467 ml_set_interrupts_enabled(istate);
b0d623f7 468 lck_mtx_unlock(&mp_cpu_boot_lock);
91447636
A
469 return KERN_SUCCESS;
470 }
55e303ae 471
b0d623f7
A
472 start_info.starter_cpu = cpu_number();
473 start_info.target_cpu = slot_num;
c910b4d9 474 start_info.target_lapic = lapic;
7e4a7d39
A
475 tsc_entry_barrier = 2;
476 tsc_exit_barrier = 2;
55e303ae 477
c910b4d9 478 /*
b0d623f7 479 * Perform the processor startup sequence with all running
c910b4d9
A
480 * processors rendezvous'ed. This is required during periods when
481 * the cache-disable bit is set for MTRR/PAT initialization.
482 */
b0d623f7 483 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
55e303ae 484
7e4a7d39
A
485 start_info.target_cpu = 0;
486
c910b4d9 487 ml_set_interrupts_enabled(istate);
b0d623f7 488 lck_mtx_unlock(&mp_cpu_boot_lock);
55e303ae 489
91447636 490 if (!cpu_datap(slot_num)->cpu_running) {
0c530ab8 491 kprintf("Failed to start CPU %02d\n", slot_num);
91447636
A
492 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
493 delay(1000000);
b0d623f7 494 halt_cpu();
55e303ae
A
495 return KERN_SUCCESS;
496 } else {
2d21ac55 497 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
55e303ae
A
498 return KERN_SUCCESS;
499 }
500}
501
55e303ae 502#if MP_DEBUG
91447636
A
503cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
504cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
55e303ae
A
505
506MP_EVENT_NAME_DECL();
507
55e303ae
A
508#endif /* MP_DEBUG */
509
fe8ab488
A
510/*
511 * Note: called with NULL state when polling for TLB flush and cross-calls.
512 */
593a1d5f 513int
0c530ab8 514cpu_signal_handler(x86_saved_state_t *regs)
55e303ae 515{
39236c6e
A
516#if !MACH_KDP
517#pragma unused (regs)
518#endif /* !MACH_KDP */
91447636 519 int my_cpu;
55e303ae 520 volatile int *my_word;
55e303ae 521
6d2010ae 522 SCHED_STATS_IPI(current_processor());
55e303ae
A
523
524 my_cpu = cpu_number();
060df5ea
A
525 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
526 /* Store the initial set of signals for diagnostics. New
527 * signals could arrive while these are being processed
528 * so it's no more than a hint.
529 */
6d2010ae 530
060df5ea 531 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
55e303ae
A
532
533 do {
55e303ae 534#if MACH_KDP
fe8ab488 535 if (i_bit(MP_KDP, my_word)) {
55e303ae
A
536 DBGLOG(cpu_handle,my_cpu,MP_KDP);
537 i_bit_clear(MP_KDP, my_word);
0c530ab8
A
538/* Ensure that the i386_kernel_state at the base of the
539 * current thread's stack (if any) is synchronized with the
540 * context at the moment of the interrupt, to facilitate
541 * access through the debugger.
0c530ab8 542 */
b0d623f7 543 sync_iss_to_iks(regs);
d41d1dae
A
544 if (pmsafe_debug && !kdp_snapshot)
545 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
b0d623f7 546 mp_kdp_wait(TRUE, FALSE);
d41d1dae
A
547 if (pmsafe_debug && !kdp_snapshot)
548 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
55e303ae
A
549 } else
550#endif /* MACH_KDP */
91447636 551 if (i_bit(MP_TLB_FLUSH, my_word)) {
55e303ae
A
552 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
553 i_bit_clear(MP_TLB_FLUSH, my_word);
554 pmap_update_interrupt();
0c530ab8
A
555 } else if (i_bit(MP_CHUD, my_word)) {
556 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
557 i_bit_clear(MP_CHUD, my_word);
558 chudxnu_cpu_signal_handler();
2d21ac55
A
559 } else if (i_bit(MP_CALL, my_word)) {
560 DBGLOG(cpu_handle,my_cpu,MP_CALL);
561 i_bit_clear(MP_CALL, my_word);
562 mp_cpus_call_action();
c910b4d9
A
563 } else if (i_bit(MP_CALL_PM, my_word)) {
564 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
565 i_bit_clear(MP_CALL_PM, my_word);
566 mp_call_PM();
55e303ae 567 }
fe8ab488
A
568 if (regs == NULL) {
569 /* Called to poll only for cross-calls and TLB flush */
570 break;
571 } else if (i_bit(MP_AST, my_word)) {
572 DBGLOG(cpu_handle,my_cpu,MP_AST);
573 i_bit_clear(MP_AST, my_word);
574 ast_check(cpu_to_processor(my_cpu));
575 }
55e303ae
A
576 } while (*my_word);
577
593a1d5f 578 return 0;
55e303ae
A
579}
580
fe8ab488 581extern void kprintf_break_lock(void);
593a1d5f 582static int
2d21ac55 583NMIInterruptHandler(x86_saved_state_t *regs)
0c530ab8 584{
fe8ab488 585 void *stackptr;
5ba3f43e
A
586 char pstr[192];
587 uint64_t now = mach_absolute_time();
060df5ea 588
6d2010ae
A
589 if (panic_active() && !panicDebugging) {
590 if (pmsafe_debug)
591 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
592 for(;;)
593 cpu_pause();
594 }
595
060df5ea 596 atomic_incl(&NMIPI_acks, 1);
39236c6e 597 atomic_incl(&NMI_count, 1);
0c530ab8 598 sync_iss_to_iks_unconditionally(regs);
b0d623f7 599 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
935ed37a 600
593a1d5f 601 if (cpu_number() == debugger_cpu)
fe8ab488 602 goto NMExit;
593a1d5f 603
5ba3f43e
A
604 if (NMI_panic_reason == SPINLOCK_TIMEOUT) {
605 snprintf(&pstr[0], sizeof(pstr),
606 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
607 cpu_number(), now, spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
fe8ab488 608 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
5ba3f43e
A
609 } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) {
610 snprintf(&pstr[0], sizeof(pstr),
611 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
612 cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid);
6d2010ae 613 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
5ba3f43e
A
614 } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) {
615 snprintf(&pstr[0], sizeof(pstr),
616 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
617 cpu_number(), now);
618 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
619 } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) {
620 snprintf(&pstr[0], sizeof(pstr),
621 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
622 cpu_number(), now, vector_timed_out);
623 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
624 }
625
b0d623f7 626#if MACH_KDP
d41d1dae
A
627 if (pmsafe_debug && !kdp_snapshot)
628 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
060df5ea 629 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
15129b1c 630 i_bit_clear(MP_KDP, &current_cpu_datap()->cpu_signals);
5ba3f43e 631 if (panic_active() || NMI_panic_reason != NONE) {
fe8ab488 632 mp_kdp_wait(FALSE, TRUE);
39037602
A
633 } else if (!mp_kdp_trap &&
634 !mp_kdp_is_NMI &&
635 virtualized && (debug_boot_arg & DB_NMI)) {
fe8ab488
A
636 /*
637 * Under a VMM with the debug boot-arg set, drop into kdp.
638 * Since an NMI is involved, there's a risk of contending with
639 * a panic. And side-effects of NMIs may result in entry into,
640 * and continuing from, the debugger being unreliable.
641 */
39037602
A
642 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
643 kprintf_break_lock();
644 kprintf("Debugger entry requested by NMI\n");
645 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0);
646 printf("Debugger entry requested by NMI\n");
647 mp_kdp_is_NMI = FALSE;
648 } else {
649 mp_kdp_wait(FALSE, FALSE);
650 }
fe8ab488
A
651 } else {
652 mp_kdp_wait(FALSE, FALSE);
653 }
d41d1dae
A
654 if (pmsafe_debug && !kdp_snapshot)
655 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
b0d623f7 656#endif
593a1d5f 657NMExit:
0c530ab8
A
658 return 1;
659}
660
2d21ac55
A
661
662/*
663 * cpu_interrupt is really just to be used by the scheduler to
664 * get a CPU's attention it may not always issue an IPI. If an
665 * IPI is always needed then use i386_cpu_IPI.
666 */
667void
668cpu_interrupt(int cpu)
669{
6d2010ae
A
670 boolean_t did_IPI = FALSE;
671
2d21ac55
A
672 if (smp_initialized
673 && pmCPUExitIdle(cpu_datap(cpu))) {
674 i386_cpu_IPI(cpu);
6d2010ae 675 did_IPI = TRUE;
2d21ac55 676 }
6d2010ae
A
677
678 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
55e303ae
A
679}
680
0c530ab8
A
681/*
682 * Send a true NMI via the local APIC to the specified CPU.
683 */
935ed37a 684void
0c530ab8
A
685cpu_NMI_interrupt(int cpu)
686{
0c530ab8 687 if (smp_initialized) {
6d2010ae 688 i386_send_NMI(cpu);
0c530ab8 689 }
0c530ab8
A
690}
691
39236c6e
A
692void
693NMI_cpus(void)
694{
695 unsigned int cpu;
696 boolean_t intrs_enabled;
697 uint64_t tsc_timeout;
698
699 intrs_enabled = ml_set_interrupts_enabled(FALSE);
700
701 for (cpu = 0; cpu < real_ncpus; cpu++) {
702 if (!cpu_datap(cpu)->cpu_running)
703 continue;
704 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
705 cpu_NMI_interrupt(cpu);
706 tsc_timeout = !machine_timeout_suspended() ?
707 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
708 ~0ULL;
709 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
710 handle_pending_TLB_flushes();
711 cpu_pause();
712 if (rdtsc64() > tsc_timeout)
713 panic("NMI_cpus() timeout cpu %d", cpu);
714 }
715 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
716 }
717
718 ml_set_interrupts_enabled(intrs_enabled);
719}
720
b0d623f7 721static void (* volatile mp_PM_func)(void) = NULL;
c910b4d9
A
722
723static void
724mp_call_PM(void)
725{
726 assert(!ml_get_interrupts_enabled());
727
728 if (mp_PM_func != NULL)
729 mp_PM_func();
730}
731
732void
733cpu_PM_interrupt(int cpu)
734{
735 assert(!ml_get_interrupts_enabled());
736
737 if (mp_PM_func != NULL) {
738 if (cpu == cpu_number())
739 mp_PM_func();
740 else
741 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
742 }
743}
744
745void
746PM_interrupt_register(void (*fn)(void))
747{
748 mp_PM_func = fn;
749}
750
55e303ae
A
751void
752i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
753{
91447636
A
754 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
755 uint64_t tsc_timeout;
6601e61a 756
0c530ab8 757
91447636 758 if (!cpu_datap(cpu)->cpu_running)
55e303ae
A
759 return;
760
0c530ab8 761 if (event == MP_TLB_FLUSH)
6d2010ae 762 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
6601e61a 763
0c530ab8
A
764 DBGLOG(cpu_signal, cpu, event);
765
55e303ae 766 i_bit_set(event, signals);
2d21ac55 767 i386_cpu_IPI(cpu);
55e303ae
A
768 if (mode == SYNC) {
769 again:
39236c6e
A
770 tsc_timeout = !machine_timeout_suspended() ?
771 rdtsc64() + (1000*1000*1000) :
772 ~0ULL;
91447636 773 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
55e303ae
A
774 cpu_pause();
775 }
776 if (i_bit(event, signals)) {
777 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
778 cpu, event);
779 goto again;
780 }
781 }
0c530ab8 782 if (event == MP_TLB_FLUSH)
6d2010ae 783 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
55e303ae
A
784}
785
39236c6e
A
786/*
787 * Helper function called when busy-waiting: panic if too long
788 * a TSC-based time has elapsed since the start of the spin.
789 */
fe8ab488
A
790static boolean_t
791mp_spin_timeout(uint64_t tsc_start)
39236c6e
A
792{
793 uint64_t tsc_timeout;
794
795 cpu_pause();
796 if (machine_timeout_suspended())
fe8ab488 797 return FALSE;
39236c6e
A
798
799 /*
800 * The timeout is 4 * the spinlock timeout period
801 * unless we have serial console printing (kprintf) enabled
802 * in which case we allow an even greater margin.
803 */
39037602
A
804 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
805 : LockTimeOutTSC << 4;
fe8ab488
A
806 return (rdtsc64() > tsc_start + tsc_timeout);
807}
808
809/*
810 * Helper function to take a spinlock while ensuring that incoming IPIs
811 * are still serviced if interrupts are masked while we spin.
39037602 812 * Returns current interrupt state.
fe8ab488 813 */
5ba3f43e 814boolean_t
fe8ab488
A
815mp_safe_spin_lock(usimple_lock_t lock)
816{
817 if (ml_get_interrupts_enabled()) {
818 simple_lock(lock);
819 return TRUE;
820 } else {
821 uint64_t tsc_spin_start = rdtsc64();
822 while (!simple_lock_try(lock)) {
823 cpu_signal_handler(NULL);
824 if (mp_spin_timeout(tsc_spin_start)) {
825 uint32_t lock_cpu;
826 uintptr_t lowner = (uintptr_t)
827 lock->interlock.lock_data;
828 spinlock_timed_out = lock;
829 lock_cpu = spinlock_timeout_NMI(lowner);
5ba3f43e
A
830 NMIPI_panic(cpu_to_cpumask(lock_cpu), SPINLOCK_TIMEOUT);
831 panic("mp_safe_spin_lock() timed out, lock: %p, owner thread: 0x%lx, current_thread: %p, owner on CPU 0x%x, time: %llu",
832 lock, lowner, current_thread(), lock_cpu, mach_absolute_time());
fe8ab488
A
833 }
834 }
835 return FALSE;
836 }
39236c6e
A
837}
838
55e303ae
A
839/*
840 * All-CPU rendezvous:
841 * - CPUs are signalled,
842 * - all execute the setup function (if specified),
843 * - rendezvous (i.e. all cpus reach a barrier),
844 * - all execute the action function (if specified),
845 * - rendezvous again,
846 * - execute the teardown function (if specified), and then
847 * - resume.
848 *
849 * Note that the supplied external functions _must_ be reentrant and aware
850 * that they are running in parallel and in an unknown lock context.
851 */
852
853static void
39037602 854mp_rendezvous_action(__unused void *null)
55e303ae 855{
39236c6e
A
856 boolean_t intrs_enabled;
857 uint64_t tsc_spin_start;
55e303ae
A
858
859 /* setup function */
860 if (mp_rv_setup_func != NULL)
861 mp_rv_setup_func(mp_rv_func_arg);
2d21ac55
A
862
863 intrs_enabled = ml_get_interrupts_enabled();
864
55e303ae 865 /* spin on entry rendezvous */
0c530ab8 866 atomic_incl(&mp_rv_entry, 1);
39236c6e 867 tsc_spin_start = rdtsc64();
490019cf 868
0c530ab8 869 while (mp_rv_entry < mp_rv_ncpus) {
2d21ac55
A
870 /* poll for pesky tlb flushes if interrupts disabled */
871 if (!intrs_enabled)
872 handle_pending_TLB_flushes();
490019cf
A
873 if (mp_spin_timeout(tsc_spin_start)) {
874 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
875 }
0c530ab8 876 }
6d2010ae 877
55e303ae
A
878 /* action function */
879 if (mp_rv_action_func != NULL)
880 mp_rv_action_func(mp_rv_func_arg);
6d2010ae 881
55e303ae 882 /* spin on exit rendezvous */
0c530ab8 883 atomic_incl(&mp_rv_exit, 1);
39236c6e 884 tsc_spin_start = rdtsc64();
2d21ac55
A
885 while (mp_rv_exit < mp_rv_ncpus) {
886 if (!intrs_enabled)
887 handle_pending_TLB_flushes();
fe8ab488 888 if (mp_spin_timeout(tsc_spin_start))
490019cf 889 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
2d21ac55 890 }
6d2010ae 891
55e303ae
A
892 /* teardown function */
893 if (mp_rv_teardown_func != NULL)
894 mp_rv_teardown_func(mp_rv_func_arg);
0c530ab8
A
895
896 /* Bump completion count */
897 atomic_incl(&mp_rv_complete, 1);
55e303ae
A
898}
899
900void
901mp_rendezvous(void (*setup_func)(void *),
902 void (*action_func)(void *),
903 void (*teardown_func)(void *),
904 void *arg)
905{
39236c6e 906 uint64_t tsc_spin_start;
55e303ae
A
907
908 if (!smp_initialized) {
909 if (setup_func != NULL)
910 setup_func(arg);
911 if (action_func != NULL)
912 action_func(arg);
913 if (teardown_func != NULL)
914 teardown_func(arg);
915 return;
916 }
917
918 /* obtain rendezvous lock */
fe8ab488 919 (void) mp_safe_spin_lock(&mp_rv_lock);
55e303ae
A
920
921 /* set static function pointers */
922 mp_rv_setup_func = setup_func;
923 mp_rv_action_func = action_func;
924 mp_rv_teardown_func = teardown_func;
925 mp_rv_func_arg = arg;
926
0c530ab8
A
927 mp_rv_entry = 0;
928 mp_rv_exit = 0;
929 mp_rv_complete = 0;
55e303ae
A
930
931 /*
932 * signal other processors, which will call mp_rendezvous_action()
2d21ac55 933 * with interrupts disabled
55e303ae 934 */
39037602 935 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
55e303ae
A
936
937 /* call executor function on this cpu */
39037602 938 mp_rendezvous_action(NULL);
55e303ae 939
0c530ab8
A
940 /*
941 * Spin for everyone to complete.
942 * This is necessary to ensure that all processors have proceeded
943 * from the exit barrier before we release the rendezvous structure.
944 */
39236c6e 945 tsc_spin_start = rdtsc64();
0c530ab8 946 while (mp_rv_complete < mp_rv_ncpus) {
fe8ab488 947 if (mp_spin_timeout(tsc_spin_start))
490019cf 948 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
0c530ab8
A
949 }
950
2d21ac55
A
951 /* Tidy up */
952 mp_rv_setup_func = NULL;
953 mp_rv_action_func = NULL;
954 mp_rv_teardown_func = NULL;
955 mp_rv_func_arg = NULL;
956
55e303ae
A
957 /* release lock */
958 simple_unlock(&mp_rv_lock);
959}
960
0c530ab8
A
961void
962mp_rendezvous_break_lock(void)
963{
964 simple_lock_init(&mp_rv_lock, 0);
965}
966
967static void
968setup_disable_intrs(__unused void * param_not_used)
969{
970 /* disable interrupts before the first barrier */
971 boolean_t intr = ml_set_interrupts_enabled(FALSE);
972
973 current_cpu_datap()->cpu_iflag = intr;
974 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
975}
976
977static void
978teardown_restore_intrs(__unused void * param_not_used)
979{
980 /* restore interrupt flag following MTRR changes */
981 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
982 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
983}
984
985/*
986 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
987 * This is exported for use by kexts.
988 */
989void
990mp_rendezvous_no_intrs(
991 void (*action_func)(void *),
992 void *arg)
993{
994 mp_rendezvous(setup_disable_intrs,
995 action_func,
996 teardown_restore_intrs,
997 arg);
998}
999
6d2010ae
A
1000
1001typedef struct {
1002 queue_chain_t link; /* queue linkage */
1003 void (*func)(void *,void *); /* routine to call */
1004 void *arg0; /* routine's 1st arg */
1005 void *arg1; /* routine's 2nd arg */
fe8ab488 1006 cpumask_t *maskp; /* completion response mask */
6d2010ae 1007} mp_call_t;
316670eb
A
1008
1009
1010typedef struct {
1011 queue_head_t queue;
1012 decl_simple_lock_data(, lock);
1013} mp_call_queue_t;
6d2010ae 1014#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
316670eb
A
1015static mp_call_queue_t mp_cpus_call_freelist;
1016static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
6d2010ae
A
1017
1018static inline boolean_t
316670eb 1019mp_call_head_lock(mp_call_queue_t *cqp)
6d2010ae
A
1020{
1021 boolean_t intrs_enabled;
1022
1023 intrs_enabled = ml_set_interrupts_enabled(FALSE);
316670eb 1024 simple_lock(&cqp->lock);
6d2010ae
A
1025
1026 return intrs_enabled;
1027}
1028
5ba3f43e
A
1029/*
1030 * Deliver an NMIPI to a set of processors to cause them to panic .
1031 */
fe8ab488 1032void
5ba3f43e 1033NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why) {
fe8ab488
A
1034 unsigned int cpu, cpu_bit;
1035 uint64_t deadline;
1036
5ba3f43e
A
1037 NMI_panic_reason = why;
1038
fe8ab488 1039 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
5ba3f43e
A
1040 if ((cpu_mask & cpu_bit) == 0)
1041 continue;
1042 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1043 cpu_NMI_interrupt(cpu);
1044 }
1045
1046 /* Wait (only so long) for NMi'ed cpus to respond */
1047 deadline = mach_absolute_time() + LockTimeOut;
1048 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1049 if ((cpu_mask & cpu_bit) == 0)
1050 continue;
1051 while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
1052 mach_absolute_time() < deadline) {
1053 cpu_pause();
1054 }
fe8ab488 1055 }
fe8ab488
A
1056}
1057
1058#if MACH_ASSERT
6d2010ae 1059static inline boolean_t
316670eb 1060mp_call_head_is_locked(mp_call_queue_t *cqp)
6d2010ae
A
1061{
1062 return !ml_get_interrupts_enabled() &&
316670eb 1063 hw_lock_held((hw_lock_t)&cqp->lock);
6d2010ae 1064}
fe8ab488 1065#endif
6d2010ae
A
1066
1067static inline void
316670eb 1068mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
6d2010ae 1069{
316670eb 1070 simple_unlock(&cqp->lock);
6d2010ae
A
1071 ml_set_interrupts_enabled(intrs_enabled);
1072}
1073
1074static inline mp_call_t *
1075mp_call_alloc(void)
1076{
316670eb
A
1077 mp_call_t *callp = NULL;
1078 boolean_t intrs_enabled;
1079 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1080
1081 intrs_enabled = mp_call_head_lock(cqp);
1082 if (!queue_empty(&cqp->queue))
1083 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1084 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae 1085
6d2010ae
A
1086 return callp;
1087}
1088
1089static inline void
1090mp_call_free(mp_call_t *callp)
0c530ab8 1091{
316670eb
A
1092 boolean_t intrs_enabled;
1093 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1094
1095 intrs_enabled = mp_call_head_lock(cqp);
1096 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1097 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae
A
1098}
1099
1100static inline mp_call_t *
316670eb 1101mp_call_dequeue_locked(mp_call_queue_t *cqp)
6d2010ae 1102{
316670eb 1103 mp_call_t *callp = NULL;
0c530ab8 1104
316670eb
A
1105 assert(mp_call_head_is_locked(cqp));
1106 if (!queue_empty(&cqp->queue))
1107 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
6d2010ae
A
1108 return callp;
1109}
1110
316670eb
A
1111static inline void
1112mp_call_enqueue_locked(
1113 mp_call_queue_t *cqp,
1114 mp_call_t *callp)
1115{
1116 queue_enter(&cqp->queue, callp, typeof(callp), link);
1117}
1118
6d2010ae
A
1119/* Called on the boot processor to initialize global structures */
1120static void
1121mp_cpus_call_init(void)
1122{
316670eb
A
1123 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1124
6d2010ae 1125 DBG("mp_cpus_call_init()\n");
316670eb
A
1126 simple_lock_init(&cqp->lock, 0);
1127 queue_init(&cqp->queue);
6d2010ae
A
1128}
1129
1130/*
fe8ab488 1131 * Called at processor registration to add call buffers to the free list
6d2010ae 1132 * and to initialize the per-cpu call queue.
6d2010ae 1133 */
fe8ab488
A
1134void
1135mp_cpus_call_cpu_init(int cpu)
6d2010ae 1136{
6d2010ae 1137 int i;
fe8ab488 1138 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
6d2010ae
A
1139 mp_call_t *callp;
1140
316670eb
A
1141 simple_lock_init(&cqp->lock, 0);
1142 queue_init(&cqp->queue);
6d2010ae
A
1143 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1144 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
6d2010ae 1145 mp_call_free(callp);
0c530ab8 1146 }
6d2010ae 1147
fe8ab488 1148 DBG("mp_cpus_call_init(%d) done\n", cpu);
0c530ab8
A
1149}
1150
2d21ac55
A
1151/*
1152 * This is called from cpu_signal_handler() to process an MP_CALL signal.
6d2010ae 1153 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
2d21ac55
A
1154 */
1155static void
1156mp_cpus_call_action(void)
1157{
316670eb 1158 mp_call_queue_t *cqp;
6d2010ae
A
1159 boolean_t intrs_enabled;
1160 mp_call_t *callp;
1161 mp_call_t call;
1162
1163 assert(!ml_get_interrupts_enabled());
316670eb
A
1164 cqp = &mp_cpus_call_head[cpu_number()];
1165 intrs_enabled = mp_call_head_lock(cqp);
1166 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
6d2010ae
A
1167 /* Copy call request to the stack to free buffer */
1168 call = *callp;
1169 mp_call_free(callp);
1170 if (call.func != NULL) {
316670eb 1171 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae
A
1172 KERNEL_DEBUG_CONSTANT(
1173 TRACE_MP_CPUS_CALL_ACTION,
4bd07ac2
A
1174 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1175 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
6d2010ae 1176 call.func(call.arg0, call.arg1);
316670eb 1177 (void) mp_call_head_lock(cqp);
6d2010ae 1178 }
fe8ab488
A
1179 if (call.maskp != NULL)
1180 i_bit_set(cpu_number(), call.maskp);
6d2010ae 1181 }
316670eb 1182 mp_call_head_unlock(cqp, intrs_enabled);
2d21ac55
A
1183}
1184
1185/*
1186 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
6d2010ae
A
1187 * Possible modes are:
1188 * SYNC: function is called serially on target cpus in logical cpu order
1189 * waiting for each call to be acknowledged before proceeding
1190 * ASYNC: function call is queued to the specified cpus
1191 * waiting for all calls to complete in parallel before returning
1192 * NOSYNC: function calls are queued
1193 * but we return before confirmation of calls completing.
2d21ac55
A
1194 * The action function may be NULL.
1195 * The cpu mask may include the local cpu. Offline cpus are ignored.
6d2010ae 1196 * The return value is the number of cpus on which the call was made or queued.
2d21ac55
A
1197 */
1198cpu_t
1199mp_cpus_call(
1200 cpumask_t cpus,
1201 mp_sync_t mode,
1202 void (*action_func)(void *),
1203 void *arg)
6d2010ae
A
1204{
1205 return mp_cpus_call1(
1206 cpus,
1207 mode,
1208 (void (*)(void *,void *))action_func,
1209 arg,
1210 NULL,
6d2010ae
A
1211 NULL);
1212}
1213
1214static void
316670eb 1215mp_cpus_call_wait(boolean_t intrs_enabled,
fe8ab488
A
1216 cpumask_t cpus_called,
1217 cpumask_t *cpus_responded)
6d2010ae 1218{
316670eb 1219 mp_call_queue_t *cqp;
39236c6e 1220 uint64_t tsc_spin_start;
6d2010ae 1221
39037602 1222 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
316670eb 1223 cqp = &mp_cpus_call_head[cpu_number()];
6d2010ae 1224
39236c6e 1225 tsc_spin_start = rdtsc64();
fe8ab488 1226 while (*cpus_responded != cpus_called) {
6d2010ae 1227 if (!intrs_enabled) {
316670eb
A
1228 /* Sniffing w/o locking */
1229 if (!queue_empty(&cqp->queue))
6d2010ae 1230 mp_cpus_call_action();
fe8ab488
A
1231 cpu_signal_handler(NULL);
1232 }
1233 if (mp_spin_timeout(tsc_spin_start)) {
1234 cpumask_t cpus_unresponsive;
1235
fe8ab488 1236 cpus_unresponsive = cpus_called & ~(*cpus_responded);
5ba3f43e 1237 NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
3e170ce0 1238 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
fe8ab488 1239 cpus_unresponsive);
6d2010ae 1240 }
6d2010ae
A
1241 }
1242}
1243
1244cpu_t
1245mp_cpus_call1(
1246 cpumask_t cpus,
1247 mp_sync_t mode,
1248 void (*action_func)(void *, void *),
1249 void *arg0,
1250 void *arg1,
39037602 1251 cpumask_t *cpus_calledp)
2d21ac55 1252{
39037602 1253 cpu_t cpu = 0;
6d2010ae 1254 boolean_t intrs_enabled = FALSE;
2d21ac55 1255 boolean_t call_self = FALSE;
6d2010ae 1256 cpumask_t cpus_called = 0;
fe8ab488
A
1257 cpumask_t cpus_responded = 0;
1258 long cpus_call_count = 0;
39236c6e 1259 uint64_t tsc_spin_start;
fe8ab488 1260 boolean_t topo_lock;
6d2010ae
A
1261
1262 KERNEL_DEBUG_CONSTANT(
1263 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
4bd07ac2 1264 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
2d21ac55
A
1265
1266 if (!smp_initialized) {
1267 if ((cpus & CPUMASK_SELF) == 0)
6d2010ae 1268 goto out;
2d21ac55 1269 if (action_func != NULL) {
6d2010ae
A
1270 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1271 action_func(arg0, arg1);
2d21ac55
A
1272 ml_set_interrupts_enabled(intrs_enabled);
1273 }
6d2010ae
A
1274 call_self = TRUE;
1275 goto out;
2d21ac55 1276 }
2d21ac55 1277
6d2010ae
A
1278 /*
1279 * Queue the call for each non-local requested cpu.
fe8ab488
A
1280 * This is performed under the topo lock to prevent changes to
1281 * cpus online state and to prevent concurrent rendezvouses --
1282 * although an exception is made if we're calling only the master
1283 * processor since that always remains active. Note: this exception
1284 * is expected for longterm timer nosync cross-calls to the master cpu.
6d2010ae 1285 */
fe8ab488
A
1286 mp_disable_preemption();
1287 intrs_enabled = ml_get_interrupts_enabled();
1288 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1289 if (topo_lock) {
1290 ml_set_interrupts_enabled(FALSE);
1291 (void) mp_safe_spin_lock(&x86_topo_lock);
1292 }
2d21ac55
A
1293 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1294 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1295 !cpu_datap(cpu)->cpu_running)
1296 continue;
fe8ab488 1297 tsc_spin_start = rdtsc64();
2d21ac55
A
1298 if (cpu == (cpu_t) cpu_number()) {
1299 /*
1300 * We don't IPI ourself and if calling asynchronously,
1301 * we defer our call until we have signalled all others.
1302 */
1303 call_self = TRUE;
1304 if (mode == SYNC && action_func != NULL) {
6d2010ae
A
1305 KERNEL_DEBUG_CONSTANT(
1306 TRACE_MP_CPUS_CALL_LOCAL,
316670eb 1307 VM_KERNEL_UNSLIDE(action_func),
4bd07ac2 1308 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
6d2010ae 1309 action_func(arg0, arg1);
2d21ac55
A
1310 }
1311 } else {
1312 /*
6d2010ae 1313 * Here to queue a call to cpu and IPI.
2d21ac55 1314 */
316670eb
A
1315 mp_call_t *callp = NULL;
1316 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
fe8ab488 1317 boolean_t intrs_inner;
316670eb 1318
6d2010ae 1319 queue_call:
316670eb
A
1320 if (callp == NULL)
1321 callp = mp_call_alloc();
fe8ab488 1322 intrs_inner = mp_call_head_lock(cqp);
39037602
A
1323 if (callp == NULL) {
1324 mp_call_head_unlock(cqp, intrs_inner);
1325 KERNEL_DEBUG_CONSTANT(
1326 TRACE_MP_CPUS_CALL_NOBUF,
1327 cpu, 0, 0, 0, 0);
1328 if (!intrs_inner) {
1329 /* Sniffing w/o locking */
1330 if (!queue_empty(&cqp->queue))
1331 mp_cpus_call_action();
1332 handle_pending_TLB_flushes();
2d21ac55 1333 }
39037602
A
1334 if (mp_spin_timeout(tsc_spin_start))
1335 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1336 tsc_spin_start, rdtsc64());
1337 goto queue_call;
6d2010ae 1338 }
39037602 1339 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
316670eb
A
1340 callp->func = action_func;
1341 callp->arg0 = arg0;
1342 callp->arg1 = arg1;
1343 mp_call_enqueue_locked(cqp, callp);
fe8ab488 1344 cpus_call_count++;
6d2010ae
A
1345 cpus_called |= cpu_to_cpumask(cpu);
1346 i386_signal_cpu(cpu, MP_CALL, ASYNC);
fe8ab488 1347 mp_call_head_unlock(cqp, intrs_inner);
6d2010ae 1348 if (mode == SYNC) {
fe8ab488 1349 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
2d21ac55
A
1350 }
1351 }
1352 }
fe8ab488
A
1353 if (topo_lock) {
1354 simple_unlock(&x86_topo_lock);
1355 ml_set_interrupts_enabled(intrs_enabled);
1356 }
2d21ac55 1357
6d2010ae
A
1358 /* Call locally if mode not SYNC */
1359 if (mode != SYNC && call_self ) {
1360 KERNEL_DEBUG_CONSTANT(
1361 TRACE_MP_CPUS_CALL_LOCAL,
4bd07ac2 1362 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
6d2010ae
A
1363 if (action_func != NULL) {
1364 ml_set_interrupts_enabled(FALSE);
1365 action_func(arg0, arg1);
2d21ac55
A
1366 ml_set_interrupts_enabled(intrs_enabled);
1367 }
2d21ac55 1368 }
2d21ac55 1369
6d2010ae 1370 /* For ASYNC, now wait for all signaled cpus to complete their calls */
fe8ab488
A
1371 if (mode == ASYNC)
1372 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
6d2010ae 1373
39037602
A
1374 /* Safe to allow pre-emption now */
1375 mp_enable_preemption();
1376
6d2010ae 1377out:
fe8ab488
A
1378 if (call_self){
1379 cpus_called |= cpu_to_cpumask(cpu);
1380 cpus_call_count++;
1381 }
6d2010ae
A
1382
1383 if (cpus_calledp)
1384 *cpus_calledp = cpus_called;
6d2010ae
A
1385
1386 KERNEL_DEBUG_CONSTANT(
1387 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
39037602 1388 cpus_call_count, cpus_called, 0, 0, 0);
2d21ac55 1389
fe8ab488 1390 return (cpu_t) cpus_call_count;
2d21ac55
A
1391}
1392
6d2010ae 1393
2d21ac55 1394static void
39037602 1395mp_broadcast_action(__unused void *null)
2d21ac55
A
1396{
1397 /* call action function */
1398 if (mp_bc_action_func != NULL)
1399 mp_bc_action_func(mp_bc_func_arg);
1400
1401 /* if we're the last one through, wake up the instigator */
b0d623f7
A
1402 if (atomic_decl_and_test(&mp_bc_count, 1))
1403 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
2d21ac55
A
1404}
1405
1406/*
1407 * mp_broadcast() runs a given function on all active cpus.
1408 * The caller blocks until the functions has run on all cpus.
1409 * The caller will also block if there is another pending braodcast.
1410 */
1411void
1412mp_broadcast(
1413 void (*action_func)(void *),
1414 void *arg)
1415{
1416 if (!smp_initialized) {
1417 if (action_func != NULL)
1418 action_func(arg);
1419 return;
1420 }
1421
1422 /* obtain broadcast lock */
b0d623f7 1423 lck_mtx_lock(&mp_bc_lock);
2d21ac55
A
1424
1425 /* set static function pointers */
1426 mp_bc_action_func = action_func;
1427 mp_bc_func_arg = arg;
1428
b0d623f7 1429 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
2d21ac55
A
1430
1431 /*
1432 * signal other processors, which will call mp_broadcast_action()
1433 */
39037602
A
1434 mp_bc_count = real_ncpus; /* assume max possible active */
1435 mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1;
1436 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
2d21ac55
A
1437
1438 /* call executor function on this cpu */
39037602 1439 mp_broadcast_action(NULL);
2d21ac55 1440
39037602 1441 /* block for other cpus to have run action_func */
2d21ac55
A
1442 if (mp_bc_ncpus > 1)
1443 thread_block(THREAD_CONTINUE_NULL);
1444 else
1445 clear_wait(current_thread(), THREAD_AWAKENED);
1446
1447 /* release lock */
b0d623f7 1448 lck_mtx_unlock(&mp_bc_lock);
2d21ac55
A
1449}
1450
fe8ab488
A
1451void
1452mp_cpus_kick(cpumask_t cpus)
1453{
1454 cpu_t cpu;
1455 boolean_t intrs_enabled = FALSE;
1456
1457 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1458 mp_safe_spin_lock(&x86_topo_lock);
1459
1460 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1461 if ((cpu == (cpu_t) cpu_number())
1462 || ((cpu_to_cpumask(cpu) & cpus) == 0)
1463 || (!cpu_datap(cpu)->cpu_running))
1464 {
1465 continue;
1466 }
1467
1468 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1469 }
1470
1471 simple_unlock(&x86_topo_lock);
1472 ml_set_interrupts_enabled(intrs_enabled);
1473}
1474
2d21ac55
A
1475void
1476i386_activate_cpu(void)
1477{
1478 cpu_data_t *cdp = current_cpu_datap();
1479
1480 assert(!ml_get_interrupts_enabled());
1481
1482 if (!smp_initialized) {
1483 cdp->cpu_running = TRUE;
1484 return;
1485 }
1486
5ba3f43e 1487 mp_safe_spin_lock(&x86_topo_lock);
2d21ac55 1488 cdp->cpu_running = TRUE;
7e4a7d39 1489 started_cpu();
2d21ac55 1490 simple_unlock(&x86_topo_lock);
7ddcb079 1491 flush_tlb_raw();
2d21ac55
A
1492}
1493
1494void
1495i386_deactivate_cpu(void)
1496{
1497 cpu_data_t *cdp = current_cpu_datap();
1498
1499 assert(!ml_get_interrupts_enabled());
bd504ef0
A
1500
1501 KERNEL_DEBUG_CONSTANT(
1502 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1503 0, 0, 0, 0, 0);
2d21ac55 1504
5ba3f43e 1505 mp_safe_spin_lock(&x86_topo_lock);
2d21ac55
A
1506 cdp->cpu_running = FALSE;
1507 simple_unlock(&x86_topo_lock);
1508
bd504ef0
A
1509 /*
1510 * Move all of this cpu's timers to the master/boot cpu,
1511 * and poke it in case there's a sooner deadline for it to schedule.
1512 */
c910b4d9 1513 timer_queue_shutdown(&cdp->rtclock_timer.queue);
39236c6e 1514 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
c910b4d9 1515
5ba3f43e
A
1516#if MONOTONIC
1517 mt_cpu_down(cdp);
1518#endif /* MONOTONIC */
1519
2d21ac55 1520 /*
bd504ef0
A
1521 * Open an interrupt window
1522 * and ensure any pending IPI or timer is serviced
2d21ac55 1523 */
bd504ef0
A
1524 mp_disable_preemption();
1525 ml_set_interrupts_enabled(TRUE);
1526
1527 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1528 cpu_pause();
1529 /*
1530 * Ensure there's no remaining timer deadline set
1531 * - AICPM may have left one active.
1532 */
1533 setPop(0);
1534
1535 ml_set_interrupts_enabled(FALSE);
1536 mp_enable_preemption();
1537
1538 KERNEL_DEBUG_CONSTANT(
1539 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1540 0, 0, 0, 0, 0);
2d21ac55
A
1541}
1542
1543int pmsafe_debug = 1;
1544
55e303ae
A
1545#if MACH_KDP
1546volatile boolean_t mp_kdp_trap = FALSE;
39037602 1547volatile boolean_t mp_kdp_is_NMI = FALSE;
593a1d5f 1548volatile unsigned long mp_kdp_ncpus;
91447636
A
1549boolean_t mp_kdp_state;
1550
55e303ae
A
1551
1552void
5ba3f43e 1553mp_kdp_enter(boolean_t proceed_on_failure)
55e303ae 1554{
91447636 1555 unsigned int cpu;
6d2010ae 1556 unsigned int ncpus = 0;
593a1d5f 1557 unsigned int my_cpu;
91447636 1558 uint64_t tsc_timeout;
55e303ae
A
1559
1560 DBG("mp_kdp_enter()\n");
1561
1562 /*
1563 * Here to enter the debugger.
1564 * In case of races, only one cpu is allowed to enter kdp after
1565 * stopping others.
1566 */
91447636 1567 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
060df5ea 1568 my_cpu = cpu_number();
7ddcb079
A
1569
1570 if (my_cpu == (unsigned) debugger_cpu) {
1571 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1572 kdp_reset();
1573 return;
1574 }
1575
5ba3f43e
A
1576 uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1577 int locked = 0;
1578 while (!locked || mp_kdp_trap) {
1579 if (locked) {
1580 simple_unlock(&x86_topo_lock);
1581 }
1582 if (proceed_on_failure) {
1583 if (mach_absolute_time() - start_time > 500000000ll) {
1584 kprintf("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1585 break;
1586 }
1587 locked = simple_lock_try(&x86_topo_lock);
1588 if (!locked) {
1589 cpu_pause();
1590 }
1591 } else {
1592 mp_safe_spin_lock(&x86_topo_lock);
1593 locked = TRUE;
1594 }
2d21ac55 1595
5ba3f43e
A
1596 if (locked && mp_kdp_trap) {
1597 simple_unlock(&x86_topo_lock);
1598 DBG("mp_kdp_enter() race lost\n");
b0d623f7 1599#if MACH_KDP
5ba3f43e 1600 mp_kdp_wait(TRUE, FALSE);
b0d623f7 1601#endif
5ba3f43e
A
1602 locked = FALSE;
1603 }
55e303ae 1604 }
5ba3f43e
A
1605
1606 if (pmsafe_debug && !kdp_snapshot)
1607 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1608
593a1d5f 1609 debugger_cpu = my_cpu;
060df5ea 1610 ncpus = 1;
5ba3f43e 1611 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
55e303ae 1612 mp_kdp_trap = TRUE;
060df5ea 1613 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
55e303ae 1614
0c530ab8
A
1615 /*
1616 * Deliver a nudge to other cpus, counting how many
1617 */
55e303ae 1618 DBG("mp_kdp_enter() signaling other processors\n");
2d21ac55 1619 if (force_immediate_debugger_NMI == FALSE) {
060df5ea 1620 for (cpu = 0; cpu < real_ncpus; cpu++) {
2d21ac55
A
1621 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1622 continue;
1623 ncpus++;
1624 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1625 }
1626 /*
1627 * Wait other processors to synchronize
1628 */
1629 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
0c530ab8 1630
2d21ac55
A
1631 /*
1632 * This timeout is rather arbitrary; we don't want to NMI
1633 * processors that are executing at potentially
1634 * "unsafe-to-interrupt" points such as the trampolines,
1635 * but neither do we want to lose state by waiting too long.
1636 */
39037602 1637 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
0c530ab8 1638
2d21ac55
A
1639 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1640 /*
1641 * A TLB shootdown request may be pending--this would
1642 * result in the requesting processor waiting in
1643 * PMAP_UPDATE_TLBS() until this processor deals with it.
1644 * Process it, so it can now enter mp_kdp_wait()
1645 */
1646 handle_pending_TLB_flushes();
1647 cpu_pause();
1648 }
1649 /* If we've timed out, and some processor(s) are still unresponsive,
1650 * interrupt them with an NMI via the local APIC.
0c530ab8 1651 */
2d21ac55 1652 if (mp_kdp_ncpus != ncpus) {
39037602 1653 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
2d21ac55
A
1654 for (cpu = 0; cpu < real_ncpus; cpu++) {
1655 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1656 continue;
1657 if (cpu_signal_pending(cpu, MP_KDP))
1658 cpu_NMI_interrupt(cpu);
1659 }
39037602
A
1660 /* Wait again for the same timeout */
1661 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1662 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1663 handle_pending_TLB_flushes();
1664 cpu_pause();
1665 }
1666 if (mp_kdp_ncpus != ncpus) {
1667 panic("mp_kdp_enter() timed-out waiting after NMI");
1668 }
2d21ac55 1669 }
55e303ae 1670 }
2d21ac55 1671 else
0c530ab8
A
1672 for (cpu = 0; cpu < real_ncpus; cpu++) {
1673 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1674 continue;
2d21ac55 1675 cpu_NMI_interrupt(cpu);
0c530ab8 1676 }
0c530ab8 1677
5ba3f43e
A
1678 if (locked) {
1679 simple_unlock(&x86_topo_lock);
1680 }
1681
bd504ef0 1682 DBG("mp_kdp_enter() %d processors done %s\n",
6d2010ae 1683 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
0c530ab8 1684
91447636 1685 postcode(MP_KDP_ENTER);
55e303ae
A
1686}
1687
0c530ab8
A
1688static boolean_t
1689cpu_signal_pending(int cpu, mp_event_t event)
1690{
1691 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1692 boolean_t retval = FALSE;
1693
1694 if (i_bit(event, signals))
1695 retval = TRUE;
1696 return retval;
1697}
b0d623f7
A
1698
1699long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1700 void *arg0, void *arg1)
1701{
1702 if (lcpu > (real_ncpus - 1))
1703 return -1;
1704
1705 if (func == NULL)
1706 return -1;
1707
1708 kdp_xcpu_call_func.func = func;
1709 kdp_xcpu_call_func.ret = -1;
1710 kdp_xcpu_call_func.arg0 = arg0;
1711 kdp_xcpu_call_func.arg1 = arg1;
1712 kdp_xcpu_call_func.cpu = lcpu;
1713 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1714 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1715 cpu_pause();
1716 return kdp_xcpu_call_func.ret;
1717}
1718
1719static void
1720kdp_x86_xcpu_poll(void)
1721{
1722 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1723 kdp_xcpu_call_func.ret =
1724 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1725 kdp_xcpu_call_func.arg1,
1726 cpu_number());
1727 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1728 }
1729}
0c530ab8 1730
55e303ae 1731static void
b0d623f7 1732mp_kdp_wait(boolean_t flush, boolean_t isNMI)
55e303ae 1733{
6601e61a 1734 DBG("mp_kdp_wait()\n");
813fb2f6 1735
bd504ef0 1736 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
b0d623f7 1737#if CONFIG_MCA
2d21ac55
A
1738 /* If we've trapped due to a machine-check, save MCA registers */
1739 mca_check_save();
b0d623f7 1740#endif
2d21ac55 1741
2d21ac55 1742 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
b0d623f7 1743 while (mp_kdp_trap || (isNMI == TRUE)) {
0c530ab8 1744 /*
2d21ac55
A
1745 * A TLB shootdown request may be pending--this would result
1746 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1747 * until this processor handles it.
0c530ab8
A
1748 * Process it, so it can now enter mp_kdp_wait()
1749 */
2d21ac55
A
1750 if (flush)
1751 handle_pending_TLB_flushes();
b0d623f7
A
1752
1753 kdp_x86_xcpu_poll();
55e303ae
A
1754 cpu_pause();
1755 }
2d21ac55 1756
0c530ab8 1757 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
55e303ae
A
1758 DBG("mp_kdp_wait() done\n");
1759}
1760
1761void
1762mp_kdp_exit(void)
1763{
1764 DBG("mp_kdp_exit()\n");
593a1d5f 1765 debugger_cpu = -1;
0c530ab8 1766 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
b0d623f7
A
1767
1768 debugger_exit_time = mach_absolute_time();
1769
55e303ae 1770 mp_kdp_trap = FALSE;
39236c6e 1771 mfence();
55e303ae
A
1772
1773 /* Wait other processors to stop spinning. XXX needs timeout */
1774 DBG("mp_kdp_exit() waiting for processors to resume\n");
0c530ab8
A
1775 while (mp_kdp_ncpus > 0) {
1776 /*
1777 * a TLB shootdown request may be pending... this would result in the requesting
1778 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1779 * Process it, so it can now enter mp_kdp_wait()
1780 */
1781 handle_pending_TLB_flushes();
1782
55e303ae
A
1783 cpu_pause();
1784 }
2d21ac55 1785
d41d1dae 1786 if (pmsafe_debug && !kdp_snapshot)
2d21ac55
A
1787 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1788
6d2010ae
A
1789 debugger_exit_time = mach_absolute_time();
1790
55e303ae 1791 DBG("mp_kdp_exit() done\n");
91447636 1792 (void) ml_set_interrupts_enabled(mp_kdp_state);
5ba3f43e 1793 postcode(MP_KDP_EXIT);
39037602
A
1794}
1795
55e303ae
A
1796#endif /* MACH_KDP */
1797
b0d623f7 1798boolean_t
490019cf 1799mp_recent_debugger_activity(void) {
060df5ea
A
1800 uint64_t abstime = mach_absolute_time();
1801 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1802 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
b0d623f7
A
1803}
1804
55e303ae
A
1805/*ARGSUSED*/
1806void
1807init_ast_check(
91447636 1808 __unused processor_t processor)
55e303ae
A
1809{
1810}
1811
1812void
1813cause_ast_check(
1814 processor_t processor)
1815{
b0d623f7 1816 int cpu = processor->cpu_id;
55e303ae
A
1817
1818 if (cpu != cpu_number()) {
1819 i386_signal_cpu(cpu, MP_AST, ASYNC);
6d2010ae 1820 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
55e303ae
A
1821 }
1822}
1823
593a1d5f
A
1824void
1825slave_machine_init(void *param)
91447636
A
1826{
1827 /*
0c530ab8 1828 * Here in process context, but with interrupts disabled.
91447636
A
1829 */
1830 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1831
593a1d5f
A
1832 if (param == FULL_SLAVE_INIT) {
1833 /*
1834 * Cold start
1835 */
1836 clock_init();
593a1d5f 1837 }
fe8ab488 1838 cpu_machine_init(); /* Interrupts enabled hereafter */
55e303ae
A
1839}
1840
b0d623f7 1841#undef cpu_number
55e303ae
A
1842int cpu_number(void)
1843{
1844 return get_cpu_number();
1845}
1846
6d2010ae
A
1847static void
1848cpu_prewarm_init()
1849{
1850 int i;
1851
1852 simple_lock_init(&cpu_warm_lock, 0);
1853 queue_init(&cpu_warm_call_list);
1854 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1855 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1856 }
1857}
1858
1859static timer_call_t
1860grab_warm_timer_call()
1861{
1862 spl_t x;
1863 timer_call_t call = NULL;
1864
1865 x = splsched();
1866 simple_lock(&cpu_warm_lock);
1867 if (!queue_empty(&cpu_warm_call_list)) {
1868 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1869 }
1870 simple_unlock(&cpu_warm_lock);
1871 splx(x);
1872
1873 return call;
1874}
1875
1876static void
1877free_warm_timer_call(timer_call_t call)
1878{
1879 spl_t x;
1880
1881 x = splsched();
1882 simple_lock(&cpu_warm_lock);
1883 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1884 simple_unlock(&cpu_warm_lock);
1885 splx(x);
1886}
1887
1888/*
1889 * Runs in timer call context (interrupts disabled).
1890 */
1891static void
1892cpu_warm_timer_call_func(
1893 call_entry_param_t p0,
1894 __unused call_entry_param_t p1)
1895{
1896 free_warm_timer_call((timer_call_t)p0);
1897 return;
1898}
1899
1900/*
1901 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1902 */
1903static void
1904_cpu_warm_setup(
1905 void *arg)
1906{
1907 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1908
39236c6e 1909 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
6d2010ae
A
1910 cwdp->cwd_result = 0;
1911
1912 return;
1913}
1914
1915/*
1916 * Not safe to call with interrupts disabled.
1917 */
1918kern_return_t
1919ml_interrupt_prewarm(
1920 uint64_t deadline)
1921{
1922 struct cpu_warm_data cwd;
1923 timer_call_t call;
1924 cpu_t ct;
1925
1926 if (ml_get_interrupts_enabled() == FALSE) {
1927 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1928 }
1929
1930 /*
1931 * If the platform doesn't need our help, say that we succeeded.
1932 */
1933 if (!ml_get_interrupt_prewake_applicable()) {
1934 return KERN_SUCCESS;
1935 }
1936
1937 /*
1938 * Grab a timer call to use.
1939 */
1940 call = grab_warm_timer_call();
1941 if (call == NULL) {
1942 return KERN_RESOURCE_SHORTAGE;
1943 }
1944
1945 timer_call_setup(call, cpu_warm_timer_call_func, call);
1946 cwd.cwd_call = call;
1947 cwd.cwd_deadline = deadline;
1948 cwd.cwd_result = 0;
1949
1950 /*
1951 * For now, non-local interrupts happen on the master processor.
1952 */
1953 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1954 if (ct == 0) {
1955 free_warm_timer_call(call);
1956 return KERN_FAILURE;
1957 } else {
1958 return cwd.cwd_result;
1959 }
1960}
39037602
A
1961
1962#if DEBUG || DEVELOPMENT
1963void
1964kernel_spin(uint64_t spin_ns)
1965{
1966 boolean_t istate;
1967 uint64_t spin_abs;
1968 uint64_t deadline;
5ba3f43e 1969 cpu_data_t *cdp;
39037602
A
1970
1971 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
1972 istate = ml_set_interrupts_enabled(FALSE);
5ba3f43e 1973 cdp = current_cpu_datap();
39037602 1974 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
5ba3f43e
A
1975
1976 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
1977 cdp->cpu_int_event_time = mach_absolute_time();
1978 cdp->cpu_int_state = (void *) USER_STATE(current_thread());
1979
39037602
A
1980 deadline = mach_absolute_time() + spin_ns;
1981 while (mach_absolute_time() < deadline)
1982 cpu_pause();
5ba3f43e
A
1983
1984 cdp->cpu_int_event_time = 0;
1985 cdp->cpu_int_state = NULL;
1986
39037602
A
1987 ml_set_interrupts_enabled(istate);
1988 kprintf("kernel_spin() continuing\n");
1989}
5ba3f43e
A
1990
1991/*
1992 * Called from the scheduler's maintenance thread,
1993 * scan running processors for long-running ISRs and:
1994 * - panic if longer than LockTimeOut, or
1995 * - log if more than a quantum.
1996 */
1997void
1998mp_interrupt_watchdog(void)
1999{
2000 cpu_t cpu;
2001 boolean_t intrs_enabled = FALSE;
2002 uint16_t cpu_int_num;
2003 uint64_t cpu_int_event_time;
2004 uint64_t cpu_rip;
2005 uint64_t cpu_int_duration;
2006 uint64_t now;
2007 x86_saved_state_t *cpu_int_state;
2008
2009 if (__improbable(!mp_interrupt_watchdog_enabled))
2010 return;
2011
2012 intrs_enabled = ml_set_interrupts_enabled(FALSE);
2013 now = mach_absolute_time();
2014 /*
2015 * While timeouts are not suspended,
2016 * check all other processors for long outstanding interrupt handling.
2017 */
2018 for (cpu = 0;
2019 cpu < (cpu_t) real_ncpus && !machine_timeout_suspended();
2020 cpu++) {
2021 if ((cpu == (cpu_t) cpu_number()) ||
2022 (!cpu_datap(cpu)->cpu_running))
2023 continue;
2024 cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time;
2025 if (cpu_int_event_time == 0)
2026 continue;
2027 if (__improbable(now < cpu_int_event_time))
2028 continue; /* skip due to inter-processor skew */
2029 cpu_int_state = cpu_datap(cpu)->cpu_int_state;
2030 if (__improbable(cpu_int_state == NULL))
2031 /* The interrupt may have been dismissed */
2032 continue;
2033
2034 /* Here with a cpu handling an interrupt */
2035
2036 cpu_int_duration = now - cpu_int_event_time;
2037 if (__improbable(cpu_int_duration > LockTimeOut)) {
2038 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2039 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2040 vector_timed_out = cpu_int_num;
2041 NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG);
2042 panic("Interrupt watchdog, "
2043 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2044 cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip);
2045 /* NOT REACHED */
2046 } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) {
2047 mp_interrupt_watchdog_events++;
2048 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2049 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2050 ml_set_interrupts_enabled(intrs_enabled);
2051 printf("Interrupt watchdog, "
2052 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2053 cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip);
2054 return;
2055 }
2056 }
2057
2058 ml_set_interrupts_enabled(intrs_enabled);
2059}
39037602 2060#endif