]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/mp.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
CommitLineData
55e303ae 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31
55e303ae 32#include <mach_rt.h>
55e303ae 33#include <mach_kdp.h>
39037602 34#include <kdp/kdp_internal.h>
55e303ae 35#include <mach_ldebug.h>
91447636
A
36#include <gprof.h>
37
38#include <mach/mach_types.h>
39#include <mach/kern_return.h>
40
41#include <kern/kern_types.h>
42#include <kern/startup.h>
c910b4d9 43#include <kern/timer_queue.h>
91447636
A
44#include <kern/processor.h>
45#include <kern/cpu_number.h>
46#include <kern/cpu_data.h>
47#include <kern/assert.h>
48#include <kern/machine.h>
0c530ab8 49#include <kern/pms.h>
593a1d5f 50#include <kern/misc_protos.h>
39236c6e 51#include <kern/timer_call.h>
6d2010ae
A
52#include <kern/kalloc.h>
53#include <kern/queue.h>
fe8ab488 54#include <prng/random.h>
91447636
A
55
56#include <vm/vm_map.h>
57#include <vm/vm_kern.h>
58
59#include <profiling/profile-mk.h>
55e303ae 60
fe8ab488 61#include <i386/bit_routines.h>
b0d623f7
A
62#include <i386/proc_reg.h>
63#include <i386/cpu_threads.h>
64#include <i386/mp_desc.h>
65#include <i386/misc_protos.h>
66#include <i386/trap.h>
67#include <i386/postcode.h>
68#include <i386/machine_routines.h>
55e303ae
A
69#include <i386/mp.h>
70#include <i386/mp_events.h>
593a1d5f 71#include <i386/lapic.h>
55e303ae 72#include <i386/cpuid.h>
b0d623f7 73#include <i386/fpu.h>
55e303ae 74#include <i386/machine_cpu.h>
0c530ab8 75#include <i386/pmCPU.h>
b0d623f7 76#if CONFIG_MCA
2d21ac55 77#include <i386/machine_check.h>
b0d623f7
A
78#endif
79#include <i386/acpi.h>
0c530ab8
A
80
81#include <chud/chud_xnu.h>
82#include <chud/chud_xnu_private.h>
83
84#include <sys/kdebug.h>
55e303ae 85
39236c6e
A
86#include <console/serial_protos.h>
87
55e303ae
A
88#if MP_DEBUG
89#define PAUSE delay(1000000)
90#define DBG(x...) kprintf(x)
91#else
92#define DBG(x...)
93#define PAUSE
94#endif /* MP_DEBUG */
95
6d2010ae
A
96/* Debugging/test trace events: */
97#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
98#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
99#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
100#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
101#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
bd504ef0
A
102#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
103#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
104#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
55e303ae 105
7e4a7d39
A
106#define ABS(v) (((v) > 0)?(v):-(v))
107
55e303ae 108void slave_boot_init(void);
6d2010ae 109void i386_cpu_IPI(int cpu);
55e303ae 110
39236c6e 111#if MACH_KDP
b0d623f7 112static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
39236c6e 113#endif /* MACH_KDP */
55e303ae 114
39037602 115static boolean_t mp_safe_spin_lock(usimple_lock_t lock);
39236c6e 116#if MACH_KDP
0c530ab8 117static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
39236c6e 118#endif /* MACH_KDP */
593a1d5f 119static int NMIInterruptHandler(x86_saved_state_t *regs);
0c530ab8 120
b0d623f7 121boolean_t smp_initialized = FALSE;
7e4a7d39 122uint32_t TSC_sync_margin = 0xFFF;
935ed37a
A
123volatile boolean_t force_immediate_debugger_NMI = FALSE;
124volatile boolean_t pmap_tlb_flush_timeout = FALSE;
55e303ae 125decl_simple_lock_data(,mp_kdp_lock);
91447636 126
39037602
A
127decl_simple_lock_data(,debugger_callback_lock);
128struct debugger_callback *debugger_callback = NULL;
129
b0d623f7
A
130decl_lck_mtx_data(static, mp_cpu_boot_lock);
131lck_mtx_ext_t mp_cpu_boot_lock_ext;
55e303ae
A
132
133/* Variables needed for MP rendezvous. */
0c530ab8 134decl_simple_lock_data(,mp_rv_lock);
b0d623f7
A
135static void (*mp_rv_setup_func)(void *arg);
136static void (*mp_rv_action_func)(void *arg);
137static void (*mp_rv_teardown_func)(void *arg);
138static void *mp_rv_func_arg;
139static volatile int mp_rv_ncpus;
0c530ab8
A
140 /* Cache-aligned barriers: */
141static volatile long mp_rv_entry __attribute__((aligned(64)));
142static volatile long mp_rv_exit __attribute__((aligned(64)));
143static volatile long mp_rv_complete __attribute__((aligned(64)));
55e303ae 144
b0d623f7
A
145volatile uint64_t debugger_entry_time;
146volatile uint64_t debugger_exit_time;
147#if MACH_KDP
7ddcb079 148#include <kdp/kdp.h>
d41d1dae 149extern int kdp_snapshot;
b0d623f7
A
150static struct _kdp_xcpu_call_func {
151 kdp_x86_xcpu_func_t func;
152 void *arg0, *arg1;
153 volatile long ret;
154 volatile uint16_t cpu;
155} kdp_xcpu_call_func = {
156 .cpu = KDP_XCPU_NONE
157};
158
159#endif
160
2d21ac55
A
161/* Variables needed for MP broadcast. */
162static void (*mp_bc_action_func)(void *arg);
163static void *mp_bc_func_arg;
593a1d5f 164static int mp_bc_ncpus;
2d21ac55 165static volatile long mp_bc_count;
b0d623f7
A
166decl_lck_mtx_data(static, mp_bc_lock);
167lck_mtx_ext_t mp_bc_lock_ext;
593a1d5f 168static volatile int debugger_cpu = -1;
39236c6e
A
169volatile long NMIPI_acks = 0;
170volatile long NMI_count = 0;
171
172extern void NMI_cpus(void);
2d21ac55 173
6d2010ae 174static void mp_cpus_call_init(void);
2d21ac55 175static void mp_cpus_call_action(void);
c910b4d9 176static void mp_call_PM(void);
2d21ac55 177
fe8ab488
A
178static boolean_t mp_cpus_call_wait_timeout = FALSE;
179
b0d623f7
A
180char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
181
6d2010ae
A
182/* PAL-related routines */
183boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
184 int ipi_vector, i386_intr_func_t ipi_handler);
185void i386_start_cpu(int lapic_id, int cpu_num);
186void i386_send_NMI(int cpu);
b0d623f7 187
91447636
A
188#if GPROF
189/*
190 * Initialize dummy structs for profiling. These aren't used but
191 * allows hertz_tick() to be built with GPROF defined.
192 */
193struct profile_vars _profile_vars;
194struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
195#define GPROF_INIT() \
196{ \
197 int i; \
198 \
199 /* Hack to initialize pointers to unused profiling structs */ \
200 for (i = 1; i < MAX_CPUS; i++) \
201 _profile_vars_cpus[i] = &_profile_vars; \
202}
203#else
204#define GPROF_INIT()
205#endif /* GPROF */
206
b0d623f7
A
207static lck_grp_t smp_lck_grp;
208static lck_grp_attr_t smp_lck_grp_attr;
209
6d2010ae
A
210#define NUM_CPU_WARM_CALLS 20
211struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
212queue_head_t cpu_warm_call_list;
213decl_simple_lock_data(static, cpu_warm_lock);
214
215typedef struct cpu_warm_data {
216 timer_call_t cwd_call;
217 uint64_t cwd_deadline;
218 int cwd_result;
219} *cpu_warm_data_t;
220
221static void cpu_prewarm_init(void);
222static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
223static void _cpu_warm_setup(void *arg);
224static timer_call_t grab_warm_timer_call(void);
225static void free_warm_timer_call(timer_call_t call);
b0d623f7 226
55e303ae
A
227void
228smp_init(void)
55e303ae 229{
91447636
A
230 simple_lock_init(&mp_kdp_lock, 0);
231 simple_lock_init(&mp_rv_lock, 0);
39037602 232 simple_lock_init(&debugger_callback_lock, 0);
b0d623f7
A
233 lck_grp_attr_setdefault(&smp_lck_grp_attr);
234 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
235 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
236 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
91447636 237 console_init();
55e303ae 238
6d2010ae
A
239 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
240 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
55e303ae
A
241 return;
242
91447636
A
243 cpu_thread_init();
244
91447636
A
245 GPROF_INIT();
246 DBGLOG_CPU_INIT(master_cpu);
247
6d2010ae 248 mp_cpus_call_init();
fe8ab488 249 mp_cpus_call_cpu_init(master_cpu);
55e303ae 250
7e4a7d39 251 if (PE_parse_boot_argn("TSC_sync_margin",
316670eb 252 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
7e4a7d39 253 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
316670eb
A
254 } else if (cpuid_vmm_present()) {
255 kprintf("TSC sync margin disabled\n");
256 TSC_sync_margin = 0;
257 }
55e303ae
A
258 smp_initialized = TRUE;
259
6d2010ae
A
260 cpu_prewarm_init();
261
55e303ae
A
262 return;
263}
264
7e4a7d39
A
265typedef struct {
266 int target_cpu;
267 int target_lapic;
268 int starter_cpu;
269} processor_start_info_t;
270static processor_start_info_t start_info __attribute__((aligned(64)));
271
272/*
273 * Cache-alignment is to avoid cross-cpu false-sharing interference.
274 */
275static volatile long tsc_entry_barrier __attribute__((aligned(64)));
276static volatile long tsc_exit_barrier __attribute__((aligned(64)));
277static volatile uint64_t tsc_target __attribute__((aligned(64)));
278
0c530ab8 279/*
593a1d5f 280 * Poll a CPU to see when it has marked itself as running.
0c530ab8 281 */
593a1d5f
A
282static void
283mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
91447636 284{
7e4a7d39 285 while (iters-- > 0) {
593a1d5f 286 if (cpu_datap(slot_num)->cpu_running)
7e4a7d39 287 break;
593a1d5f 288 delay(usecdelay);
91447636 289 }
55e303ae
A
290}
291
b0d623f7
A
292/*
293 * Quickly bring a CPU back online which has been halted.
294 */
295kern_return_t
296intel_startCPU_fast(int slot_num)
297{
7e4a7d39 298 kern_return_t rc;
b0d623f7
A
299
300 /*
301 * Try to perform a fast restart
302 */
303 rc = pmCPUExitHalt(slot_num);
304 if (rc != KERN_SUCCESS)
305 /*
306 * The CPU was not eligible for a fast restart.
307 */
308 return(rc);
309
bd504ef0
A
310 KERNEL_DEBUG_CONSTANT(
311 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
312 slot_num, 0, 0, 0, 0);
313
b0d623f7
A
314 /*
315 * Wait until the CPU is back online.
316 */
317 mp_disable_preemption();
318
319 /*
320 * We use short pauses (1us) for low latency. 30,000 iterations is
321 * longer than a full restart would require so it should be more
322 * than long enough.
323 */
6d2010ae 324
b0d623f7
A
325 mp_wait_for_cpu_up(slot_num, 30000, 1);
326 mp_enable_preemption();
327
bd504ef0
A
328 KERNEL_DEBUG_CONSTANT(
329 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
330 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
331
b0d623f7
A
332 /*
333 * Check to make sure that the CPU is really running. If not,
334 * go through the slow path.
335 */
336 if (cpu_datap(slot_num)->cpu_running)
337 return(KERN_SUCCESS);
7e4a7d39 338 else
b0d623f7
A
339 return(KERN_FAILURE);
340}
341
7e4a7d39
A
342static void
343started_cpu(void)
344{
345 /* Here on the started cpu with cpu_running set TRUE */
c910b4d9 346
7e4a7d39
A
347 if (TSC_sync_margin &&
348 start_info.target_cpu == cpu_number()) {
349 /*
350 * I've just started-up, synchronize again with the starter cpu
351 * and then snap my TSC.
352 */
353 tsc_target = 0;
354 atomic_decl(&tsc_entry_barrier, 1);
355 while (tsc_entry_barrier != 0)
356 ; /* spin for starter and target at barrier */
357 tsc_target = rdtsc64();
358 atomic_decl(&tsc_exit_barrier, 1);
359 }
360}
c910b4d9
A
361
362static void
363start_cpu(void *arg)
364{
365 int i = 1000;
366 processor_start_info_t *psip = (processor_start_info_t *) arg;
367
368 /* Ignore this if the current processor is not the starter */
369 if (cpu_number() != psip->starter_cpu)
370 return;
371
bd504ef0
A
372 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
373 arg, psip->target_cpu, psip->target_lapic);
374
375 KERNEL_DEBUG_CONSTANT(
376 TRACE_MP_CPU_START | DBG_FUNC_START,
377 psip->target_cpu,
378 psip->target_lapic, 0, 0, 0);
379
6d2010ae 380 i386_start_cpu(psip->target_lapic, psip->target_cpu);
c910b4d9
A
381
382#ifdef POSTCODE_DELAY
383 /* Wait much longer if postcodes are displayed for a delay period. */
384 i *= 10000;
385#endif
bd504ef0
A
386 DBG("start_cpu(%p) about to wait for cpu %d\n",
387 arg, psip->target_cpu);
388
c910b4d9 389 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
bd504ef0
A
390
391 KERNEL_DEBUG_CONSTANT(
392 TRACE_MP_CPU_START | DBG_FUNC_END,
393 psip->target_cpu,
394 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
395
7e4a7d39
A
396 if (TSC_sync_margin &&
397 cpu_datap(psip->target_cpu)->cpu_running) {
398 /*
399 * Compare the TSC from the started processor with ours.
400 * Report and log/panic if it diverges by more than
401 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
402 * can be overriden by boot-arg (with 0 meaning no checking).
403 */
404 uint64_t tsc_starter;
405 int64_t tsc_delta;
406 atomic_decl(&tsc_entry_barrier, 1);
407 while (tsc_entry_barrier != 0)
408 ; /* spin for both processors at barrier */
409 tsc_starter = rdtsc64();
410 atomic_decl(&tsc_exit_barrier, 1);
411 while (tsc_exit_barrier != 0)
412 ; /* spin for target to store its TSC */
413 tsc_delta = tsc_target - tsc_starter;
414 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
415 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
416 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
417#if DEBUG
418 panic(
419#else
420 printf(
421#endif
422 "Unsynchronized TSC for cpu %d: "
423 "0x%016llx, delta 0x%llx\n",
424 psip->target_cpu, tsc_target, tsc_delta);
425 }
426 }
c910b4d9
A
427}
428
55e303ae
A
429kern_return_t
430intel_startCPU(
431 int slot_num)
432{
c910b4d9
A
433 int lapic = cpu_to_lapic[slot_num];
434 boolean_t istate;
55e303ae 435
91447636
A
436 assert(lapic != -1);
437
438 DBGLOG_CPU_INIT(slot_num);
55e303ae 439
91447636 440 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
6d2010ae 441 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
55e303ae 442
0c530ab8
A
443 /*
444 * Initialize (or re-initialize) the descriptor tables for this cpu.
445 * Propagate processor mode to slave.
446 */
39236c6e 447 cpu_desc_init64(cpu_datap(slot_num));
91447636 448
c910b4d9 449 /* Serialize use of the slave boot stack, etc. */
b0d623f7 450 lck_mtx_lock(&mp_cpu_boot_lock);
55e303ae 451
c910b4d9 452 istate = ml_set_interrupts_enabled(FALSE);
91447636 453 if (slot_num == get_cpu_number()) {
c910b4d9 454 ml_set_interrupts_enabled(istate);
b0d623f7 455 lck_mtx_unlock(&mp_cpu_boot_lock);
91447636
A
456 return KERN_SUCCESS;
457 }
55e303ae 458
b0d623f7
A
459 start_info.starter_cpu = cpu_number();
460 start_info.target_cpu = slot_num;
c910b4d9 461 start_info.target_lapic = lapic;
7e4a7d39
A
462 tsc_entry_barrier = 2;
463 tsc_exit_barrier = 2;
55e303ae 464
c910b4d9 465 /*
b0d623f7 466 * Perform the processor startup sequence with all running
c910b4d9
A
467 * processors rendezvous'ed. This is required during periods when
468 * the cache-disable bit is set for MTRR/PAT initialization.
469 */
b0d623f7 470 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
55e303ae 471
7e4a7d39
A
472 start_info.target_cpu = 0;
473
c910b4d9 474 ml_set_interrupts_enabled(istate);
b0d623f7 475 lck_mtx_unlock(&mp_cpu_boot_lock);
55e303ae 476
91447636 477 if (!cpu_datap(slot_num)->cpu_running) {
0c530ab8 478 kprintf("Failed to start CPU %02d\n", slot_num);
91447636
A
479 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
480 delay(1000000);
b0d623f7 481 halt_cpu();
55e303ae
A
482 return KERN_SUCCESS;
483 } else {
2d21ac55 484 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
55e303ae
A
485 return KERN_SUCCESS;
486 }
487}
488
55e303ae 489#if MP_DEBUG
91447636
A
490cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
491cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
55e303ae
A
492
493MP_EVENT_NAME_DECL();
494
55e303ae
A
495#endif /* MP_DEBUG */
496
fe8ab488
A
497/*
498 * Note: called with NULL state when polling for TLB flush and cross-calls.
499 */
593a1d5f 500int
0c530ab8 501cpu_signal_handler(x86_saved_state_t *regs)
55e303ae 502{
39236c6e
A
503#if !MACH_KDP
504#pragma unused (regs)
505#endif /* !MACH_KDP */
91447636 506 int my_cpu;
55e303ae 507 volatile int *my_word;
55e303ae 508
6d2010ae 509 SCHED_STATS_IPI(current_processor());
55e303ae
A
510
511 my_cpu = cpu_number();
060df5ea
A
512 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
513 /* Store the initial set of signals for diagnostics. New
514 * signals could arrive while these are being processed
515 * so it's no more than a hint.
516 */
6d2010ae 517
060df5ea 518 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
55e303ae
A
519
520 do {
55e303ae 521#if MACH_KDP
fe8ab488 522 if (i_bit(MP_KDP, my_word)) {
55e303ae
A
523 DBGLOG(cpu_handle,my_cpu,MP_KDP);
524 i_bit_clear(MP_KDP, my_word);
0c530ab8
A
525/* Ensure that the i386_kernel_state at the base of the
526 * current thread's stack (if any) is synchronized with the
527 * context at the moment of the interrupt, to facilitate
528 * access through the debugger.
0c530ab8 529 */
b0d623f7 530 sync_iss_to_iks(regs);
d41d1dae
A
531 if (pmsafe_debug && !kdp_snapshot)
532 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
b0d623f7 533 mp_kdp_wait(TRUE, FALSE);
d41d1dae
A
534 if (pmsafe_debug && !kdp_snapshot)
535 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
55e303ae
A
536 } else
537#endif /* MACH_KDP */
91447636 538 if (i_bit(MP_TLB_FLUSH, my_word)) {
55e303ae
A
539 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
540 i_bit_clear(MP_TLB_FLUSH, my_word);
541 pmap_update_interrupt();
0c530ab8
A
542 } else if (i_bit(MP_CHUD, my_word)) {
543 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
544 i_bit_clear(MP_CHUD, my_word);
545 chudxnu_cpu_signal_handler();
2d21ac55
A
546 } else if (i_bit(MP_CALL, my_word)) {
547 DBGLOG(cpu_handle,my_cpu,MP_CALL);
548 i_bit_clear(MP_CALL, my_word);
549 mp_cpus_call_action();
c910b4d9
A
550 } else if (i_bit(MP_CALL_PM, my_word)) {
551 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
552 i_bit_clear(MP_CALL_PM, my_word);
553 mp_call_PM();
55e303ae 554 }
fe8ab488
A
555 if (regs == NULL) {
556 /* Called to poll only for cross-calls and TLB flush */
557 break;
558 } else if (i_bit(MP_AST, my_word)) {
559 DBGLOG(cpu_handle,my_cpu,MP_AST);
560 i_bit_clear(MP_AST, my_word);
561 ast_check(cpu_to_processor(my_cpu));
562 }
55e303ae
A
563 } while (*my_word);
564
593a1d5f 565 return 0;
55e303ae
A
566}
567
fe8ab488 568extern void kprintf_break_lock(void);
593a1d5f 569static int
2d21ac55 570NMIInterruptHandler(x86_saved_state_t *regs)
0c530ab8 571{
fe8ab488 572 void *stackptr;
060df5ea 573
6d2010ae
A
574 if (panic_active() && !panicDebugging) {
575 if (pmsafe_debug)
576 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
577 for(;;)
578 cpu_pause();
579 }
580
060df5ea 581 atomic_incl(&NMIPI_acks, 1);
39236c6e 582 atomic_incl(&NMI_count, 1);
0c530ab8 583 sync_iss_to_iks_unconditionally(regs);
b0d623f7 584 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
935ed37a 585
593a1d5f 586 if (cpu_number() == debugger_cpu)
fe8ab488 587 goto NMExit;
593a1d5f 588
060df5ea 589 if (spinlock_timed_out) {
7ddcb079 590 char pstr[192];
060df5ea
A
591 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
592 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
fe8ab488
A
593 } else if (mp_cpus_call_wait_timeout) {
594 char pstr[192];
595 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number());
596 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
060df5ea 597 } else if (pmap_tlb_flush_timeout == TRUE) {
593a1d5f 598 char pstr[128];
7ddcb079 599 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
6d2010ae 600 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
fe8ab488 601 }
b0d623f7
A
602
603#if MACH_KDP
d41d1dae
A
604 if (pmsafe_debug && !kdp_snapshot)
605 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
060df5ea 606 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
15129b1c 607 i_bit_clear(MP_KDP, &current_cpu_datap()->cpu_signals);
fe8ab488
A
608 if (pmap_tlb_flush_timeout ||
609 spinlock_timed_out ||
610 mp_cpus_call_wait_timeout ||
611 panic_active()) {
612 mp_kdp_wait(FALSE, TRUE);
39037602
A
613 } else if (!mp_kdp_trap &&
614 !mp_kdp_is_NMI &&
615 virtualized && (debug_boot_arg & DB_NMI)) {
fe8ab488
A
616 /*
617 * Under a VMM with the debug boot-arg set, drop into kdp.
618 * Since an NMI is involved, there's a risk of contending with
619 * a panic. And side-effects of NMIs may result in entry into,
620 * and continuing from, the debugger being unreliable.
621 */
39037602
A
622 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
623 kprintf_break_lock();
624 kprintf("Debugger entry requested by NMI\n");
625 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0);
626 printf("Debugger entry requested by NMI\n");
627 mp_kdp_is_NMI = FALSE;
628 } else {
629 mp_kdp_wait(FALSE, FALSE);
630 }
fe8ab488
A
631 } else {
632 mp_kdp_wait(FALSE, FALSE);
633 }
d41d1dae
A
634 if (pmsafe_debug && !kdp_snapshot)
635 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
b0d623f7 636#endif
593a1d5f 637NMExit:
0c530ab8
A
638 return 1;
639}
640
2d21ac55
A
641
642/*
643 * cpu_interrupt is really just to be used by the scheduler to
644 * get a CPU's attention it may not always issue an IPI. If an
645 * IPI is always needed then use i386_cpu_IPI.
646 */
647void
648cpu_interrupt(int cpu)
649{
6d2010ae
A
650 boolean_t did_IPI = FALSE;
651
2d21ac55
A
652 if (smp_initialized
653 && pmCPUExitIdle(cpu_datap(cpu))) {
654 i386_cpu_IPI(cpu);
6d2010ae 655 did_IPI = TRUE;
2d21ac55 656 }
6d2010ae
A
657
658 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
55e303ae
A
659}
660
0c530ab8
A
661/*
662 * Send a true NMI via the local APIC to the specified CPU.
663 */
935ed37a 664void
0c530ab8
A
665cpu_NMI_interrupt(int cpu)
666{
0c530ab8 667 if (smp_initialized) {
6d2010ae 668 i386_send_NMI(cpu);
0c530ab8 669 }
0c530ab8
A
670}
671
39236c6e
A
672void
673NMI_cpus(void)
674{
675 unsigned int cpu;
676 boolean_t intrs_enabled;
677 uint64_t tsc_timeout;
678
679 intrs_enabled = ml_set_interrupts_enabled(FALSE);
680
681 for (cpu = 0; cpu < real_ncpus; cpu++) {
682 if (!cpu_datap(cpu)->cpu_running)
683 continue;
684 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
685 cpu_NMI_interrupt(cpu);
686 tsc_timeout = !machine_timeout_suspended() ?
687 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
688 ~0ULL;
689 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
690 handle_pending_TLB_flushes();
691 cpu_pause();
692 if (rdtsc64() > tsc_timeout)
693 panic("NMI_cpus() timeout cpu %d", cpu);
694 }
695 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
696 }
697
698 ml_set_interrupts_enabled(intrs_enabled);
699}
700
b0d623f7 701static void (* volatile mp_PM_func)(void) = NULL;
c910b4d9
A
702
703static void
704mp_call_PM(void)
705{
706 assert(!ml_get_interrupts_enabled());
707
708 if (mp_PM_func != NULL)
709 mp_PM_func();
710}
711
712void
713cpu_PM_interrupt(int cpu)
714{
715 assert(!ml_get_interrupts_enabled());
716
717 if (mp_PM_func != NULL) {
718 if (cpu == cpu_number())
719 mp_PM_func();
720 else
721 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
722 }
723}
724
725void
726PM_interrupt_register(void (*fn)(void))
727{
728 mp_PM_func = fn;
729}
730
55e303ae
A
731void
732i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
733{
91447636
A
734 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
735 uint64_t tsc_timeout;
6601e61a 736
0c530ab8 737
91447636 738 if (!cpu_datap(cpu)->cpu_running)
55e303ae
A
739 return;
740
0c530ab8 741 if (event == MP_TLB_FLUSH)
6d2010ae 742 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
6601e61a 743
0c530ab8
A
744 DBGLOG(cpu_signal, cpu, event);
745
55e303ae 746 i_bit_set(event, signals);
2d21ac55 747 i386_cpu_IPI(cpu);
55e303ae
A
748 if (mode == SYNC) {
749 again:
39236c6e
A
750 tsc_timeout = !machine_timeout_suspended() ?
751 rdtsc64() + (1000*1000*1000) :
752 ~0ULL;
91447636 753 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
55e303ae
A
754 cpu_pause();
755 }
756 if (i_bit(event, signals)) {
757 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
758 cpu, event);
759 goto again;
760 }
761 }
0c530ab8 762 if (event == MP_TLB_FLUSH)
6d2010ae 763 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
55e303ae
A
764}
765
39236c6e
A
766/*
767 * Helper function called when busy-waiting: panic if too long
768 * a TSC-based time has elapsed since the start of the spin.
769 */
fe8ab488
A
770static boolean_t
771mp_spin_timeout(uint64_t tsc_start)
39236c6e
A
772{
773 uint64_t tsc_timeout;
774
775 cpu_pause();
776 if (machine_timeout_suspended())
fe8ab488 777 return FALSE;
39236c6e
A
778
779 /*
780 * The timeout is 4 * the spinlock timeout period
781 * unless we have serial console printing (kprintf) enabled
782 * in which case we allow an even greater margin.
783 */
39037602
A
784 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
785 : LockTimeOutTSC << 4;
fe8ab488
A
786 return (rdtsc64() > tsc_start + tsc_timeout);
787}
788
789/*
790 * Helper function to take a spinlock while ensuring that incoming IPIs
791 * are still serviced if interrupts are masked while we spin.
39037602 792 * Returns current interrupt state.
fe8ab488
A
793 */
794static boolean_t
795mp_safe_spin_lock(usimple_lock_t lock)
796{
797 if (ml_get_interrupts_enabled()) {
798 simple_lock(lock);
799 return TRUE;
800 } else {
801 uint64_t tsc_spin_start = rdtsc64();
802 while (!simple_lock_try(lock)) {
803 cpu_signal_handler(NULL);
804 if (mp_spin_timeout(tsc_spin_start)) {
805 uint32_t lock_cpu;
806 uintptr_t lowner = (uintptr_t)
807 lock->interlock.lock_data;
808 spinlock_timed_out = lock;
809 lock_cpu = spinlock_timeout_NMI(lowner);
810 panic("mp_safe_spin_lock() timed out,"
811 " lock: %p, owner thread: 0x%lx,"
812 " current_thread: %p, owner on CPU 0x%x",
813 lock, lowner,
814 current_thread(), lock_cpu);
815 }
816 }
817 return FALSE;
818 }
39236c6e
A
819}
820
55e303ae
A
821/*
822 * All-CPU rendezvous:
823 * - CPUs are signalled,
824 * - all execute the setup function (if specified),
825 * - rendezvous (i.e. all cpus reach a barrier),
826 * - all execute the action function (if specified),
827 * - rendezvous again,
828 * - execute the teardown function (if specified), and then
829 * - resume.
830 *
831 * Note that the supplied external functions _must_ be reentrant and aware
832 * that they are running in parallel and in an unknown lock context.
833 */
834
835static void
39037602 836mp_rendezvous_action(__unused void *null)
55e303ae 837{
39236c6e
A
838 boolean_t intrs_enabled;
839 uint64_t tsc_spin_start;
55e303ae
A
840
841 /* setup function */
842 if (mp_rv_setup_func != NULL)
843 mp_rv_setup_func(mp_rv_func_arg);
2d21ac55
A
844
845 intrs_enabled = ml_get_interrupts_enabled();
846
55e303ae 847 /* spin on entry rendezvous */
0c530ab8 848 atomic_incl(&mp_rv_entry, 1);
39236c6e 849 tsc_spin_start = rdtsc64();
490019cf 850
0c530ab8 851 while (mp_rv_entry < mp_rv_ncpus) {
2d21ac55
A
852 /* poll for pesky tlb flushes if interrupts disabled */
853 if (!intrs_enabled)
854 handle_pending_TLB_flushes();
490019cf
A
855 if (mp_spin_timeout(tsc_spin_start)) {
856 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
857 }
0c530ab8 858 }
6d2010ae 859
55e303ae
A
860 /* action function */
861 if (mp_rv_action_func != NULL)
862 mp_rv_action_func(mp_rv_func_arg);
6d2010ae 863
55e303ae 864 /* spin on exit rendezvous */
0c530ab8 865 atomic_incl(&mp_rv_exit, 1);
39236c6e 866 tsc_spin_start = rdtsc64();
2d21ac55
A
867 while (mp_rv_exit < mp_rv_ncpus) {
868 if (!intrs_enabled)
869 handle_pending_TLB_flushes();
fe8ab488 870 if (mp_spin_timeout(tsc_spin_start))
490019cf 871 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
2d21ac55 872 }
6d2010ae 873
55e303ae
A
874 /* teardown function */
875 if (mp_rv_teardown_func != NULL)
876 mp_rv_teardown_func(mp_rv_func_arg);
0c530ab8
A
877
878 /* Bump completion count */
879 atomic_incl(&mp_rv_complete, 1);
55e303ae
A
880}
881
882void
883mp_rendezvous(void (*setup_func)(void *),
884 void (*action_func)(void *),
885 void (*teardown_func)(void *),
886 void *arg)
887{
39236c6e 888 uint64_t tsc_spin_start;
55e303ae
A
889
890 if (!smp_initialized) {
891 if (setup_func != NULL)
892 setup_func(arg);
893 if (action_func != NULL)
894 action_func(arg);
895 if (teardown_func != NULL)
896 teardown_func(arg);
897 return;
898 }
899
900 /* obtain rendezvous lock */
fe8ab488 901 (void) mp_safe_spin_lock(&mp_rv_lock);
55e303ae
A
902
903 /* set static function pointers */
904 mp_rv_setup_func = setup_func;
905 mp_rv_action_func = action_func;
906 mp_rv_teardown_func = teardown_func;
907 mp_rv_func_arg = arg;
908
0c530ab8
A
909 mp_rv_entry = 0;
910 mp_rv_exit = 0;
911 mp_rv_complete = 0;
55e303ae
A
912
913 /*
914 * signal other processors, which will call mp_rendezvous_action()
2d21ac55 915 * with interrupts disabled
55e303ae 916 */
39037602 917 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
55e303ae
A
918
919 /* call executor function on this cpu */
39037602 920 mp_rendezvous_action(NULL);
55e303ae 921
0c530ab8
A
922 /*
923 * Spin for everyone to complete.
924 * This is necessary to ensure that all processors have proceeded
925 * from the exit barrier before we release the rendezvous structure.
926 */
39236c6e 927 tsc_spin_start = rdtsc64();
0c530ab8 928 while (mp_rv_complete < mp_rv_ncpus) {
fe8ab488 929 if (mp_spin_timeout(tsc_spin_start))
490019cf 930 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
0c530ab8
A
931 }
932
2d21ac55
A
933 /* Tidy up */
934 mp_rv_setup_func = NULL;
935 mp_rv_action_func = NULL;
936 mp_rv_teardown_func = NULL;
937 mp_rv_func_arg = NULL;
938
55e303ae
A
939 /* release lock */
940 simple_unlock(&mp_rv_lock);
941}
942
0c530ab8
A
943void
944mp_rendezvous_break_lock(void)
945{
946 simple_lock_init(&mp_rv_lock, 0);
947}
948
949static void
950setup_disable_intrs(__unused void * param_not_used)
951{
952 /* disable interrupts before the first barrier */
953 boolean_t intr = ml_set_interrupts_enabled(FALSE);
954
955 current_cpu_datap()->cpu_iflag = intr;
956 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
957}
958
959static void
960teardown_restore_intrs(__unused void * param_not_used)
961{
962 /* restore interrupt flag following MTRR changes */
963 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
964 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
965}
966
967/*
968 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
969 * This is exported for use by kexts.
970 */
971void
972mp_rendezvous_no_intrs(
973 void (*action_func)(void *),
974 void *arg)
975{
976 mp_rendezvous(setup_disable_intrs,
977 action_func,
978 teardown_restore_intrs,
979 arg);
980}
981
6d2010ae
A
982
983typedef struct {
984 queue_chain_t link; /* queue linkage */
985 void (*func)(void *,void *); /* routine to call */
986 void *arg0; /* routine's 1st arg */
987 void *arg1; /* routine's 2nd arg */
fe8ab488 988 cpumask_t *maskp; /* completion response mask */
6d2010ae 989} mp_call_t;
316670eb
A
990
991
992typedef struct {
993 queue_head_t queue;
994 decl_simple_lock_data(, lock);
995} mp_call_queue_t;
6d2010ae 996#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
316670eb
A
997static mp_call_queue_t mp_cpus_call_freelist;
998static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
6d2010ae
A
999
1000static inline boolean_t
316670eb 1001mp_call_head_lock(mp_call_queue_t *cqp)
6d2010ae
A
1002{
1003 boolean_t intrs_enabled;
1004
1005 intrs_enabled = ml_set_interrupts_enabled(FALSE);
316670eb 1006 simple_lock(&cqp->lock);
6d2010ae
A
1007
1008 return intrs_enabled;
1009}
1010
fe8ab488
A
1011void
1012mp_cpus_NMIPI(cpumask_t cpu_mask) {
1013 unsigned int cpu, cpu_bit;
1014 uint64_t deadline;
1015
1016 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1017 if (cpu_mask & cpu_bit)
1018 cpu_NMI_interrupt(cpu);
1019 }
1020 deadline = mach_absolute_time() + (LockTimeOut);
1021 while (mach_absolute_time() < deadline)
1022 cpu_pause();
1023}
1024
1025#if MACH_ASSERT
6d2010ae 1026static inline boolean_t
316670eb 1027mp_call_head_is_locked(mp_call_queue_t *cqp)
6d2010ae
A
1028{
1029 return !ml_get_interrupts_enabled() &&
316670eb 1030 hw_lock_held((hw_lock_t)&cqp->lock);
6d2010ae 1031}
fe8ab488 1032#endif
6d2010ae
A
1033
1034static inline void
316670eb 1035mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
6d2010ae 1036{
316670eb 1037 simple_unlock(&cqp->lock);
6d2010ae
A
1038 ml_set_interrupts_enabled(intrs_enabled);
1039}
1040
1041static inline mp_call_t *
1042mp_call_alloc(void)
1043{
316670eb
A
1044 mp_call_t *callp = NULL;
1045 boolean_t intrs_enabled;
1046 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1047
1048 intrs_enabled = mp_call_head_lock(cqp);
1049 if (!queue_empty(&cqp->queue))
1050 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1051 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae 1052
6d2010ae
A
1053 return callp;
1054}
1055
1056static inline void
1057mp_call_free(mp_call_t *callp)
0c530ab8 1058{
316670eb
A
1059 boolean_t intrs_enabled;
1060 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1061
1062 intrs_enabled = mp_call_head_lock(cqp);
1063 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1064 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae
A
1065}
1066
1067static inline mp_call_t *
316670eb 1068mp_call_dequeue_locked(mp_call_queue_t *cqp)
6d2010ae 1069{
316670eb 1070 mp_call_t *callp = NULL;
0c530ab8 1071
316670eb
A
1072 assert(mp_call_head_is_locked(cqp));
1073 if (!queue_empty(&cqp->queue))
1074 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
6d2010ae
A
1075 return callp;
1076}
1077
316670eb
A
1078static inline void
1079mp_call_enqueue_locked(
1080 mp_call_queue_t *cqp,
1081 mp_call_t *callp)
1082{
1083 queue_enter(&cqp->queue, callp, typeof(callp), link);
1084}
1085
6d2010ae
A
1086/* Called on the boot processor to initialize global structures */
1087static void
1088mp_cpus_call_init(void)
1089{
316670eb
A
1090 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1091
6d2010ae 1092 DBG("mp_cpus_call_init()\n");
316670eb
A
1093 simple_lock_init(&cqp->lock, 0);
1094 queue_init(&cqp->queue);
6d2010ae
A
1095}
1096
1097/*
fe8ab488 1098 * Called at processor registration to add call buffers to the free list
6d2010ae 1099 * and to initialize the per-cpu call queue.
6d2010ae 1100 */
fe8ab488
A
1101void
1102mp_cpus_call_cpu_init(int cpu)
6d2010ae 1103{
6d2010ae 1104 int i;
fe8ab488 1105 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
6d2010ae
A
1106 mp_call_t *callp;
1107
316670eb
A
1108 simple_lock_init(&cqp->lock, 0);
1109 queue_init(&cqp->queue);
6d2010ae
A
1110 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1111 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
6d2010ae 1112 mp_call_free(callp);
0c530ab8 1113 }
6d2010ae 1114
fe8ab488 1115 DBG("mp_cpus_call_init(%d) done\n", cpu);
0c530ab8
A
1116}
1117
2d21ac55
A
1118/*
1119 * This is called from cpu_signal_handler() to process an MP_CALL signal.
6d2010ae 1120 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
2d21ac55
A
1121 */
1122static void
1123mp_cpus_call_action(void)
1124{
316670eb 1125 mp_call_queue_t *cqp;
6d2010ae
A
1126 boolean_t intrs_enabled;
1127 mp_call_t *callp;
1128 mp_call_t call;
1129
1130 assert(!ml_get_interrupts_enabled());
316670eb
A
1131 cqp = &mp_cpus_call_head[cpu_number()];
1132 intrs_enabled = mp_call_head_lock(cqp);
1133 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
6d2010ae
A
1134 /* Copy call request to the stack to free buffer */
1135 call = *callp;
1136 mp_call_free(callp);
1137 if (call.func != NULL) {
316670eb 1138 mp_call_head_unlock(cqp, intrs_enabled);
6d2010ae
A
1139 KERNEL_DEBUG_CONSTANT(
1140 TRACE_MP_CPUS_CALL_ACTION,
4bd07ac2
A
1141 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1142 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
6d2010ae 1143 call.func(call.arg0, call.arg1);
316670eb 1144 (void) mp_call_head_lock(cqp);
6d2010ae 1145 }
fe8ab488
A
1146 if (call.maskp != NULL)
1147 i_bit_set(cpu_number(), call.maskp);
6d2010ae 1148 }
316670eb 1149 mp_call_head_unlock(cqp, intrs_enabled);
2d21ac55
A
1150}
1151
1152/*
1153 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
6d2010ae
A
1154 * Possible modes are:
1155 * SYNC: function is called serially on target cpus in logical cpu order
1156 * waiting for each call to be acknowledged before proceeding
1157 * ASYNC: function call is queued to the specified cpus
1158 * waiting for all calls to complete in parallel before returning
1159 * NOSYNC: function calls are queued
1160 * but we return before confirmation of calls completing.
2d21ac55
A
1161 * The action function may be NULL.
1162 * The cpu mask may include the local cpu. Offline cpus are ignored.
6d2010ae 1163 * The return value is the number of cpus on which the call was made or queued.
2d21ac55
A
1164 */
1165cpu_t
1166mp_cpus_call(
1167 cpumask_t cpus,
1168 mp_sync_t mode,
1169 void (*action_func)(void *),
1170 void *arg)
6d2010ae
A
1171{
1172 return mp_cpus_call1(
1173 cpus,
1174 mode,
1175 (void (*)(void *,void *))action_func,
1176 arg,
1177 NULL,
6d2010ae
A
1178 NULL);
1179}
1180
1181static void
316670eb 1182mp_cpus_call_wait(boolean_t intrs_enabled,
fe8ab488
A
1183 cpumask_t cpus_called,
1184 cpumask_t *cpus_responded)
6d2010ae 1185{
316670eb 1186 mp_call_queue_t *cqp;
39236c6e 1187 uint64_t tsc_spin_start;
6d2010ae 1188
39037602 1189 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
316670eb 1190 cqp = &mp_cpus_call_head[cpu_number()];
6d2010ae 1191
39236c6e 1192 tsc_spin_start = rdtsc64();
fe8ab488 1193 while (*cpus_responded != cpus_called) {
6d2010ae 1194 if (!intrs_enabled) {
316670eb
A
1195 /* Sniffing w/o locking */
1196 if (!queue_empty(&cqp->queue))
6d2010ae 1197 mp_cpus_call_action();
fe8ab488
A
1198 cpu_signal_handler(NULL);
1199 }
1200 if (mp_spin_timeout(tsc_spin_start)) {
1201 cpumask_t cpus_unresponsive;
1202
1203 mp_cpus_call_wait_timeout = TRUE;
1204 cpus_unresponsive = cpus_called & ~(*cpus_responded);
1205 mp_cpus_NMIPI(cpus_unresponsive);
3e170ce0 1206 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
fe8ab488 1207 cpus_unresponsive);
6d2010ae 1208 }
6d2010ae
A
1209 }
1210}
1211
1212cpu_t
1213mp_cpus_call1(
1214 cpumask_t cpus,
1215 mp_sync_t mode,
1216 void (*action_func)(void *, void *),
1217 void *arg0,
1218 void *arg1,
39037602 1219 cpumask_t *cpus_calledp)
2d21ac55 1220{
39037602 1221 cpu_t cpu = 0;
6d2010ae 1222 boolean_t intrs_enabled = FALSE;
2d21ac55 1223 boolean_t call_self = FALSE;
6d2010ae 1224 cpumask_t cpus_called = 0;
fe8ab488
A
1225 cpumask_t cpus_responded = 0;
1226 long cpus_call_count = 0;
39236c6e 1227 uint64_t tsc_spin_start;
fe8ab488 1228 boolean_t topo_lock;
6d2010ae
A
1229
1230 KERNEL_DEBUG_CONSTANT(
1231 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
4bd07ac2 1232 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
2d21ac55
A
1233
1234 if (!smp_initialized) {
1235 if ((cpus & CPUMASK_SELF) == 0)
6d2010ae 1236 goto out;
2d21ac55 1237 if (action_func != NULL) {
6d2010ae
A
1238 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1239 action_func(arg0, arg1);
2d21ac55
A
1240 ml_set_interrupts_enabled(intrs_enabled);
1241 }
6d2010ae
A
1242 call_self = TRUE;
1243 goto out;
2d21ac55 1244 }
2d21ac55 1245
6d2010ae
A
1246 /*
1247 * Queue the call for each non-local requested cpu.
fe8ab488
A
1248 * This is performed under the topo lock to prevent changes to
1249 * cpus online state and to prevent concurrent rendezvouses --
1250 * although an exception is made if we're calling only the master
1251 * processor since that always remains active. Note: this exception
1252 * is expected for longterm timer nosync cross-calls to the master cpu.
6d2010ae 1253 */
fe8ab488
A
1254 mp_disable_preemption();
1255 intrs_enabled = ml_get_interrupts_enabled();
1256 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1257 if (topo_lock) {
1258 ml_set_interrupts_enabled(FALSE);
1259 (void) mp_safe_spin_lock(&x86_topo_lock);
1260 }
2d21ac55
A
1261 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1262 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1263 !cpu_datap(cpu)->cpu_running)
1264 continue;
fe8ab488 1265 tsc_spin_start = rdtsc64();
2d21ac55
A
1266 if (cpu == (cpu_t) cpu_number()) {
1267 /*
1268 * We don't IPI ourself and if calling asynchronously,
1269 * we defer our call until we have signalled all others.
1270 */
1271 call_self = TRUE;
1272 if (mode == SYNC && action_func != NULL) {
6d2010ae
A
1273 KERNEL_DEBUG_CONSTANT(
1274 TRACE_MP_CPUS_CALL_LOCAL,
316670eb 1275 VM_KERNEL_UNSLIDE(action_func),
4bd07ac2 1276 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
6d2010ae 1277 action_func(arg0, arg1);
2d21ac55
A
1278 }
1279 } else {
1280 /*
6d2010ae 1281 * Here to queue a call to cpu and IPI.
2d21ac55 1282 */
316670eb
A
1283 mp_call_t *callp = NULL;
1284 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
fe8ab488 1285 boolean_t intrs_inner;
316670eb 1286
6d2010ae 1287 queue_call:
316670eb
A
1288 if (callp == NULL)
1289 callp = mp_call_alloc();
fe8ab488 1290 intrs_inner = mp_call_head_lock(cqp);
39037602
A
1291 if (callp == NULL) {
1292 mp_call_head_unlock(cqp, intrs_inner);
1293 KERNEL_DEBUG_CONSTANT(
1294 TRACE_MP_CPUS_CALL_NOBUF,
1295 cpu, 0, 0, 0, 0);
1296 if (!intrs_inner) {
1297 /* Sniffing w/o locking */
1298 if (!queue_empty(&cqp->queue))
1299 mp_cpus_call_action();
1300 handle_pending_TLB_flushes();
2d21ac55 1301 }
39037602
A
1302 if (mp_spin_timeout(tsc_spin_start))
1303 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1304 tsc_spin_start, rdtsc64());
1305 goto queue_call;
6d2010ae 1306 }
39037602 1307 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
316670eb
A
1308 callp->func = action_func;
1309 callp->arg0 = arg0;
1310 callp->arg1 = arg1;
1311 mp_call_enqueue_locked(cqp, callp);
fe8ab488 1312 cpus_call_count++;
6d2010ae
A
1313 cpus_called |= cpu_to_cpumask(cpu);
1314 i386_signal_cpu(cpu, MP_CALL, ASYNC);
fe8ab488 1315 mp_call_head_unlock(cqp, intrs_inner);
6d2010ae 1316 if (mode == SYNC) {
fe8ab488 1317 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
2d21ac55
A
1318 }
1319 }
1320 }
fe8ab488
A
1321 if (topo_lock) {
1322 simple_unlock(&x86_topo_lock);
1323 ml_set_interrupts_enabled(intrs_enabled);
1324 }
2d21ac55 1325
6d2010ae
A
1326 /* Call locally if mode not SYNC */
1327 if (mode != SYNC && call_self ) {
1328 KERNEL_DEBUG_CONSTANT(
1329 TRACE_MP_CPUS_CALL_LOCAL,
4bd07ac2 1330 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
6d2010ae
A
1331 if (action_func != NULL) {
1332 ml_set_interrupts_enabled(FALSE);
1333 action_func(arg0, arg1);
2d21ac55
A
1334 ml_set_interrupts_enabled(intrs_enabled);
1335 }
2d21ac55 1336 }
2d21ac55 1337
6d2010ae 1338 /* For ASYNC, now wait for all signaled cpus to complete their calls */
fe8ab488
A
1339 if (mode == ASYNC)
1340 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
6d2010ae 1341
39037602
A
1342 /* Safe to allow pre-emption now */
1343 mp_enable_preemption();
1344
6d2010ae 1345out:
fe8ab488
A
1346 if (call_self){
1347 cpus_called |= cpu_to_cpumask(cpu);
1348 cpus_call_count++;
1349 }
6d2010ae
A
1350
1351 if (cpus_calledp)
1352 *cpus_calledp = cpus_called;
6d2010ae
A
1353
1354 KERNEL_DEBUG_CONSTANT(
1355 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
39037602 1356 cpus_call_count, cpus_called, 0, 0, 0);
2d21ac55 1357
fe8ab488 1358 return (cpu_t) cpus_call_count;
2d21ac55
A
1359}
1360
6d2010ae 1361
2d21ac55 1362static void
39037602 1363mp_broadcast_action(__unused void *null)
2d21ac55
A
1364{
1365 /* call action function */
1366 if (mp_bc_action_func != NULL)
1367 mp_bc_action_func(mp_bc_func_arg);
1368
1369 /* if we're the last one through, wake up the instigator */
b0d623f7
A
1370 if (atomic_decl_and_test(&mp_bc_count, 1))
1371 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
2d21ac55
A
1372}
1373
1374/*
1375 * mp_broadcast() runs a given function on all active cpus.
1376 * The caller blocks until the functions has run on all cpus.
1377 * The caller will also block if there is another pending braodcast.
1378 */
1379void
1380mp_broadcast(
1381 void (*action_func)(void *),
1382 void *arg)
1383{
1384 if (!smp_initialized) {
1385 if (action_func != NULL)
1386 action_func(arg);
1387 return;
1388 }
1389
1390 /* obtain broadcast lock */
b0d623f7 1391 lck_mtx_lock(&mp_bc_lock);
2d21ac55
A
1392
1393 /* set static function pointers */
1394 mp_bc_action_func = action_func;
1395 mp_bc_func_arg = arg;
1396
b0d623f7 1397 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
2d21ac55
A
1398
1399 /*
1400 * signal other processors, which will call mp_broadcast_action()
1401 */
39037602
A
1402 mp_bc_count = real_ncpus; /* assume max possible active */
1403 mp_bc_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, *mp_broadcast_action, NULL) + 1;
1404 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
2d21ac55
A
1405
1406 /* call executor function on this cpu */
39037602 1407 mp_broadcast_action(NULL);
2d21ac55 1408
39037602 1409 /* block for other cpus to have run action_func */
2d21ac55
A
1410 if (mp_bc_ncpus > 1)
1411 thread_block(THREAD_CONTINUE_NULL);
1412 else
1413 clear_wait(current_thread(), THREAD_AWAKENED);
1414
1415 /* release lock */
b0d623f7 1416 lck_mtx_unlock(&mp_bc_lock);
2d21ac55
A
1417}
1418
fe8ab488
A
1419void
1420mp_cpus_kick(cpumask_t cpus)
1421{
1422 cpu_t cpu;
1423 boolean_t intrs_enabled = FALSE;
1424
1425 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1426 mp_safe_spin_lock(&x86_topo_lock);
1427
1428 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1429 if ((cpu == (cpu_t) cpu_number())
1430 || ((cpu_to_cpumask(cpu) & cpus) == 0)
1431 || (!cpu_datap(cpu)->cpu_running))
1432 {
1433 continue;
1434 }
1435
1436 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1437 }
1438
1439 simple_unlock(&x86_topo_lock);
1440 ml_set_interrupts_enabled(intrs_enabled);
1441}
1442
2d21ac55
A
1443void
1444i386_activate_cpu(void)
1445{
1446 cpu_data_t *cdp = current_cpu_datap();
1447
1448 assert(!ml_get_interrupts_enabled());
1449
1450 if (!smp_initialized) {
1451 cdp->cpu_running = TRUE;
1452 return;
1453 }
1454
1455 simple_lock(&x86_topo_lock);
1456 cdp->cpu_running = TRUE;
7e4a7d39 1457 started_cpu();
2d21ac55 1458 simple_unlock(&x86_topo_lock);
7ddcb079 1459 flush_tlb_raw();
2d21ac55
A
1460}
1461
1462void
1463i386_deactivate_cpu(void)
1464{
1465 cpu_data_t *cdp = current_cpu_datap();
1466
1467 assert(!ml_get_interrupts_enabled());
bd504ef0
A
1468
1469 KERNEL_DEBUG_CONSTANT(
1470 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1471 0, 0, 0, 0, 0);
2d21ac55
A
1472
1473 simple_lock(&x86_topo_lock);
1474 cdp->cpu_running = FALSE;
1475 simple_unlock(&x86_topo_lock);
1476
bd504ef0
A
1477 /*
1478 * Move all of this cpu's timers to the master/boot cpu,
1479 * and poke it in case there's a sooner deadline for it to schedule.
1480 */
c910b4d9 1481 timer_queue_shutdown(&cdp->rtclock_timer.queue);
39236c6e 1482 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
c910b4d9 1483
2d21ac55 1484 /*
bd504ef0
A
1485 * Open an interrupt window
1486 * and ensure any pending IPI or timer is serviced
2d21ac55 1487 */
bd504ef0
A
1488 mp_disable_preemption();
1489 ml_set_interrupts_enabled(TRUE);
1490
1491 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1492 cpu_pause();
1493 /*
1494 * Ensure there's no remaining timer deadline set
1495 * - AICPM may have left one active.
1496 */
1497 setPop(0);
1498
1499 ml_set_interrupts_enabled(FALSE);
1500 mp_enable_preemption();
1501
1502 KERNEL_DEBUG_CONSTANT(
1503 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1504 0, 0, 0, 0, 0);
2d21ac55
A
1505}
1506
1507int pmsafe_debug = 1;
1508
55e303ae
A
1509#if MACH_KDP
1510volatile boolean_t mp_kdp_trap = FALSE;
39037602 1511volatile boolean_t mp_kdp_is_NMI = FALSE;
593a1d5f 1512volatile unsigned long mp_kdp_ncpus;
91447636
A
1513boolean_t mp_kdp_state;
1514
55e303ae
A
1515
1516void
1517mp_kdp_enter(void)
1518{
91447636 1519 unsigned int cpu;
6d2010ae 1520 unsigned int ncpus = 0;
593a1d5f 1521 unsigned int my_cpu;
91447636 1522 uint64_t tsc_timeout;
55e303ae
A
1523
1524 DBG("mp_kdp_enter()\n");
1525
39236c6e
A
1526#if DEBUG
1527 if (!smp_initialized)
1528 simple_lock_init(&mp_kdp_lock, 0);
1529#endif
1530
55e303ae
A
1531 /*
1532 * Here to enter the debugger.
1533 * In case of races, only one cpu is allowed to enter kdp after
1534 * stopping others.
1535 */
91447636 1536 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
060df5ea 1537 my_cpu = cpu_number();
7ddcb079
A
1538
1539 if (my_cpu == (unsigned) debugger_cpu) {
1540 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1541 kdp_reset();
1542 return;
1543 }
1544
060df5ea 1545 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
55e303ae 1546 simple_lock(&mp_kdp_lock);
060df5ea 1547
d41d1dae 1548 if (pmsafe_debug && !kdp_snapshot)
2d21ac55
A
1549 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1550
55e303ae
A
1551 while (mp_kdp_trap) {
1552 simple_unlock(&mp_kdp_lock);
1553 DBG("mp_kdp_enter() race lost\n");
b0d623f7
A
1554#if MACH_KDP
1555 mp_kdp_wait(TRUE, FALSE);
1556#endif
55e303ae
A
1557 simple_lock(&mp_kdp_lock);
1558 }
593a1d5f 1559 debugger_cpu = my_cpu;
060df5ea 1560 ncpus = 1;
55e303ae
A
1561 mp_kdp_ncpus = 1; /* self */
1562 mp_kdp_trap = TRUE;
060df5ea 1563 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
55e303ae 1564 simple_unlock(&mp_kdp_lock);
55e303ae 1565
0c530ab8
A
1566 /*
1567 * Deliver a nudge to other cpus, counting how many
1568 */
55e303ae 1569 DBG("mp_kdp_enter() signaling other processors\n");
2d21ac55 1570 if (force_immediate_debugger_NMI == FALSE) {
060df5ea 1571 for (cpu = 0; cpu < real_ncpus; cpu++) {
2d21ac55
A
1572 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1573 continue;
1574 ncpus++;
1575 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1576 }
1577 /*
1578 * Wait other processors to synchronize
1579 */
1580 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
0c530ab8 1581
2d21ac55
A
1582 /*
1583 * This timeout is rather arbitrary; we don't want to NMI
1584 * processors that are executing at potentially
1585 * "unsafe-to-interrupt" points such as the trampolines,
1586 * but neither do we want to lose state by waiting too long.
1587 */
39037602 1588 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
0c530ab8 1589
2d21ac55
A
1590 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1591 /*
1592 * A TLB shootdown request may be pending--this would
1593 * result in the requesting processor waiting in
1594 * PMAP_UPDATE_TLBS() until this processor deals with it.
1595 * Process it, so it can now enter mp_kdp_wait()
1596 */
1597 handle_pending_TLB_flushes();
1598 cpu_pause();
1599 }
1600 /* If we've timed out, and some processor(s) are still unresponsive,
1601 * interrupt them with an NMI via the local APIC.
0c530ab8 1602 */
2d21ac55 1603 if (mp_kdp_ncpus != ncpus) {
39037602 1604 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
2d21ac55
A
1605 for (cpu = 0; cpu < real_ncpus; cpu++) {
1606 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1607 continue;
1608 if (cpu_signal_pending(cpu, MP_KDP))
1609 cpu_NMI_interrupt(cpu);
1610 }
39037602
A
1611 /* Wait again for the same timeout */
1612 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1613 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1614 handle_pending_TLB_flushes();
1615 cpu_pause();
1616 }
1617 if (mp_kdp_ncpus != ncpus) {
1618 panic("mp_kdp_enter() timed-out waiting after NMI");
1619 }
2d21ac55 1620 }
55e303ae 1621 }
2d21ac55 1622 else
0c530ab8
A
1623 for (cpu = 0; cpu < real_ncpus; cpu++) {
1624 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1625 continue;
2d21ac55 1626 cpu_NMI_interrupt(cpu);
0c530ab8 1627 }
0c530ab8 1628
bd504ef0 1629 DBG("mp_kdp_enter() %d processors done %s\n",
6d2010ae 1630 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
0c530ab8 1631
91447636 1632 postcode(MP_KDP_ENTER);
55e303ae
A
1633}
1634
0c530ab8
A
1635static boolean_t
1636cpu_signal_pending(int cpu, mp_event_t event)
1637{
1638 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1639 boolean_t retval = FALSE;
1640
1641 if (i_bit(event, signals))
1642 retval = TRUE;
1643 return retval;
1644}
b0d623f7
A
1645
1646long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1647 void *arg0, void *arg1)
1648{
1649 if (lcpu > (real_ncpus - 1))
1650 return -1;
1651
1652 if (func == NULL)
1653 return -1;
1654
1655 kdp_xcpu_call_func.func = func;
1656 kdp_xcpu_call_func.ret = -1;
1657 kdp_xcpu_call_func.arg0 = arg0;
1658 kdp_xcpu_call_func.arg1 = arg1;
1659 kdp_xcpu_call_func.cpu = lcpu;
1660 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1661 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1662 cpu_pause();
1663 return kdp_xcpu_call_func.ret;
1664}
1665
1666static void
1667kdp_x86_xcpu_poll(void)
1668{
1669 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1670 kdp_xcpu_call_func.ret =
1671 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1672 kdp_xcpu_call_func.arg1,
1673 cpu_number());
1674 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1675 }
1676}
0c530ab8 1677
55e303ae 1678static void
b0d623f7 1679mp_kdp_wait(boolean_t flush, boolean_t isNMI)
55e303ae 1680{
6601e61a 1681 DBG("mp_kdp_wait()\n");
2d21ac55 1682 /* If an I/O port has been specified as a debugging aid, issue a read */
0c530ab8 1683 panic_io_port_read();
bd504ef0 1684 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
b0d623f7 1685#if CONFIG_MCA
2d21ac55
A
1686 /* If we've trapped due to a machine-check, save MCA registers */
1687 mca_check_save();
b0d623f7 1688#endif
2d21ac55 1689
2d21ac55 1690 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
b0d623f7 1691 while (mp_kdp_trap || (isNMI == TRUE)) {
0c530ab8 1692 /*
2d21ac55
A
1693 * A TLB shootdown request may be pending--this would result
1694 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1695 * until this processor handles it.
0c530ab8
A
1696 * Process it, so it can now enter mp_kdp_wait()
1697 */
2d21ac55
A
1698 if (flush)
1699 handle_pending_TLB_flushes();
b0d623f7
A
1700
1701 kdp_x86_xcpu_poll();
55e303ae
A
1702 cpu_pause();
1703 }
2d21ac55 1704
0c530ab8 1705 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
55e303ae
A
1706 DBG("mp_kdp_wait() done\n");
1707}
1708
1709void
1710mp_kdp_exit(void)
1711{
1712 DBG("mp_kdp_exit()\n");
593a1d5f 1713 debugger_cpu = -1;
0c530ab8 1714 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
b0d623f7
A
1715
1716 debugger_exit_time = mach_absolute_time();
1717
55e303ae 1718 mp_kdp_trap = FALSE;
39236c6e 1719 mfence();
55e303ae
A
1720
1721 /* Wait other processors to stop spinning. XXX needs timeout */
1722 DBG("mp_kdp_exit() waiting for processors to resume\n");
0c530ab8
A
1723 while (mp_kdp_ncpus > 0) {
1724 /*
1725 * a TLB shootdown request may be pending... this would result in the requesting
1726 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1727 * Process it, so it can now enter mp_kdp_wait()
1728 */
1729 handle_pending_TLB_flushes();
1730
55e303ae
A
1731 cpu_pause();
1732 }
2d21ac55 1733
d41d1dae 1734 if (pmsafe_debug && !kdp_snapshot)
2d21ac55
A
1735 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1736
6d2010ae
A
1737 debugger_exit_time = mach_absolute_time();
1738
55e303ae 1739 DBG("mp_kdp_exit() done\n");
91447636
A
1740 (void) ml_set_interrupts_enabled(mp_kdp_state);
1741 postcode(0);
55e303ae 1742}
39037602
A
1743
1744#define TRAP_DEBUGGER __asm__ volatile("int3")
1745
1746kern_return_t
1747DebuggerWithCallback(kern_return_t (*callback) (void*),
1748 void *callback_context,
1749 boolean_t proceed_on_sync_failure)
1750{
1751 simple_lock(&debugger_callback_lock);
1752
1753 struct debugger_callback callback_buf = {
1754 .callback = callback,
1755 .callback_context = callback_context,
1756 .proceed_on_sync_failure = proceed_on_sync_failure,
1757 .error = KERN_FAILURE
1758 };
1759
1760 assert(debugger_callback == NULL);
1761 debugger_callback = &callback_buf;
1762
1763 TRAP_DEBUGGER;
1764
1765 debugger_callback = NULL;
1766
1767 simple_unlock(&debugger_callback_lock);
1768
1769 return callback_buf.error;
1770}
1771
55e303ae
A
1772#endif /* MACH_KDP */
1773
b0d623f7 1774boolean_t
490019cf 1775mp_recent_debugger_activity(void) {
060df5ea
A
1776 uint64_t abstime = mach_absolute_time();
1777 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1778 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
b0d623f7
A
1779}
1780
55e303ae
A
1781/*ARGSUSED*/
1782void
1783init_ast_check(
91447636 1784 __unused processor_t processor)
55e303ae
A
1785{
1786}
1787
1788void
1789cause_ast_check(
1790 processor_t processor)
1791{
b0d623f7 1792 int cpu = processor->cpu_id;
55e303ae
A
1793
1794 if (cpu != cpu_number()) {
1795 i386_signal_cpu(cpu, MP_AST, ASYNC);
6d2010ae 1796 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
55e303ae
A
1797 }
1798}
1799
593a1d5f
A
1800void
1801slave_machine_init(void *param)
91447636
A
1802{
1803 /*
0c530ab8 1804 * Here in process context, but with interrupts disabled.
91447636
A
1805 */
1806 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1807
593a1d5f
A
1808 if (param == FULL_SLAVE_INIT) {
1809 /*
1810 * Cold start
1811 */
1812 clock_init();
593a1d5f 1813 }
fe8ab488 1814 cpu_machine_init(); /* Interrupts enabled hereafter */
55e303ae
A
1815}
1816
b0d623f7 1817#undef cpu_number
55e303ae
A
1818int cpu_number(void)
1819{
1820 return get_cpu_number();
1821}
1822
6d2010ae
A
1823static void
1824cpu_prewarm_init()
1825{
1826 int i;
1827
1828 simple_lock_init(&cpu_warm_lock, 0);
1829 queue_init(&cpu_warm_call_list);
1830 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1831 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1832 }
1833}
1834
1835static timer_call_t
1836grab_warm_timer_call()
1837{
1838 spl_t x;
1839 timer_call_t call = NULL;
1840
1841 x = splsched();
1842 simple_lock(&cpu_warm_lock);
1843 if (!queue_empty(&cpu_warm_call_list)) {
1844 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1845 }
1846 simple_unlock(&cpu_warm_lock);
1847 splx(x);
1848
1849 return call;
1850}
1851
1852static void
1853free_warm_timer_call(timer_call_t call)
1854{
1855 spl_t x;
1856
1857 x = splsched();
1858 simple_lock(&cpu_warm_lock);
1859 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1860 simple_unlock(&cpu_warm_lock);
1861 splx(x);
1862}
1863
1864/*
1865 * Runs in timer call context (interrupts disabled).
1866 */
1867static void
1868cpu_warm_timer_call_func(
1869 call_entry_param_t p0,
1870 __unused call_entry_param_t p1)
1871{
1872 free_warm_timer_call((timer_call_t)p0);
1873 return;
1874}
1875
1876/*
1877 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1878 */
1879static void
1880_cpu_warm_setup(
1881 void *arg)
1882{
1883 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1884
39236c6e 1885 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
6d2010ae
A
1886 cwdp->cwd_result = 0;
1887
1888 return;
1889}
1890
1891/*
1892 * Not safe to call with interrupts disabled.
1893 */
1894kern_return_t
1895ml_interrupt_prewarm(
1896 uint64_t deadline)
1897{
1898 struct cpu_warm_data cwd;
1899 timer_call_t call;
1900 cpu_t ct;
1901
1902 if (ml_get_interrupts_enabled() == FALSE) {
1903 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1904 }
1905
1906 /*
1907 * If the platform doesn't need our help, say that we succeeded.
1908 */
1909 if (!ml_get_interrupt_prewake_applicable()) {
1910 return KERN_SUCCESS;
1911 }
1912
1913 /*
1914 * Grab a timer call to use.
1915 */
1916 call = grab_warm_timer_call();
1917 if (call == NULL) {
1918 return KERN_RESOURCE_SHORTAGE;
1919 }
1920
1921 timer_call_setup(call, cpu_warm_timer_call_func, call);
1922 cwd.cwd_call = call;
1923 cwd.cwd_deadline = deadline;
1924 cwd.cwd_result = 0;
1925
1926 /*
1927 * For now, non-local interrupts happen on the master processor.
1928 */
1929 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1930 if (ct == 0) {
1931 free_warm_timer_call(call);
1932 return KERN_FAILURE;
1933 } else {
1934 return cwd.cwd_result;
1935 }
1936}
39037602
A
1937
1938#if DEBUG || DEVELOPMENT
1939void
1940kernel_spin(uint64_t spin_ns)
1941{
1942 boolean_t istate;
1943 uint64_t spin_abs;
1944 uint64_t deadline;
1945
1946 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
1947 istate = ml_set_interrupts_enabled(FALSE);
1948 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
1949 deadline = mach_absolute_time() + spin_ns;
1950 while (mach_absolute_time() < deadline)
1951 cpu_pause();
1952 ml_set_interrupts_enabled(istate);
1953 kprintf("kernel_spin() continuing\n");
1954}
1955#endif