]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 *
3 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 *
5 * This file contains Original Code and/or Modifications of Original Code
6 * as defined in and that are subject to the Apple Public Source License
7 * Version 2.0 (the 'License'). You may not use this file except in
8 * compliance with the License. The rights granted to you under the License
9 * may not be used to create, or enable the creation or redistribution of,
10 * unlawful or unlicensed copies of an Apple operating system, or to
11 * circumvent, violate, or enable the circumvention or violation of, any
12 * terms of an Apple operating system software license agreement.
13 *
14 * Please obtain a copy of the License at
15 * http://www.opensource.apple.com/apsl/ and read it before using this file.
16 *
17 * The Original Code and all software distributed under the License are
18 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 * Please see the License for the specific language governing rights and
23 * limitations under the License.
24 *
25 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 */
27 /*
28 * @OSF_COPYRIGHT@
29 */
30
31 #include <mach_rt.h>
32 #include <mach_kdp.h>
33 #include <mach_ldebug.h>
34 #include <gprof.h>
35
36 #include <mach/mach_types.h>
37 #include <mach/kern_return.h>
38
39 #include <kern/kern_types.h>
40 #include <kern/startup.h>
41 #include <kern/timer_queue.h>
42 #include <kern/processor.h>
43 #include <kern/cpu_number.h>
44 #include <kern/cpu_data.h>
45 #include <kern/assert.h>
46 #include <kern/machine.h>
47 #include <kern/pms.h>
48 #include <kern/misc_protos.h>
49 #include <kern/etimer.h>
50 #include <kern/kalloc.h>
51 #include <kern/queue.h>
52
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55
56 #include <profiling/profile-mk.h>
57
58 #include <i386/proc_reg.h>
59 #include <i386/cpu_threads.h>
60 #include <i386/mp_desc.h>
61 #include <i386/misc_protos.h>
62 #include <i386/trap.h>
63 #include <i386/postcode.h>
64 #include <i386/machine_routines.h>
65 #include <i386/mp.h>
66 #include <i386/mp_events.h>
67 #include <i386/lapic.h>
68 #include <i386/cpuid.h>
69 #include <i386/fpu.h>
70 #include <i386/machine_cpu.h>
71 #include <i386/pmCPU.h>
72 #if CONFIG_MCA
73 #include <i386/machine_check.h>
74 #endif
75 #include <i386/acpi.h>
76
77 #include <chud/chud_xnu.h>
78 #include <chud/chud_xnu_private.h>
79
80 #include <sys/kdebug.h>
81
82 #if MP_DEBUG
83 #define PAUSE delay(1000000)
84 #define DBG(x...) kprintf(x)
85 #else
86 #define DBG(x...)
87 #define PAUSE
88 #endif /* MP_DEBUG */
89
90 /* Debugging/test trace events: */
91 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
92 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
93 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
94 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
95 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
96
97 #define ABS(v) (((v) > 0)?(v):-(v))
98
99 void slave_boot_init(void);
100 void i386_cpu_IPI(int cpu);
101
102 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
103 static void mp_rendezvous_action(void);
104 static void mp_broadcast_action(void);
105
106 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
107 static int NMIInterruptHandler(x86_saved_state_t *regs);
108
109 boolean_t smp_initialized = FALSE;
110 uint32_t TSC_sync_margin = 0xFFF;
111 volatile boolean_t force_immediate_debugger_NMI = FALSE;
112 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
113 decl_simple_lock_data(,mp_kdp_lock);
114
115 decl_lck_mtx_data(static, mp_cpu_boot_lock);
116 lck_mtx_ext_t mp_cpu_boot_lock_ext;
117
118 /* Variables needed for MP rendezvous. */
119 decl_simple_lock_data(,mp_rv_lock);
120 static void (*mp_rv_setup_func)(void *arg);
121 static void (*mp_rv_action_func)(void *arg);
122 static void (*mp_rv_teardown_func)(void *arg);
123 static void *mp_rv_func_arg;
124 static volatile int mp_rv_ncpus;
125 /* Cache-aligned barriers: */
126 static volatile long mp_rv_entry __attribute__((aligned(64)));
127 static volatile long mp_rv_exit __attribute__((aligned(64)));
128 static volatile long mp_rv_complete __attribute__((aligned(64)));
129
130 volatile uint64_t debugger_entry_time;
131 volatile uint64_t debugger_exit_time;
132 #if MACH_KDP
133 #include <kdp/kdp.h>
134 extern int kdp_snapshot;
135 static struct _kdp_xcpu_call_func {
136 kdp_x86_xcpu_func_t func;
137 void *arg0, *arg1;
138 volatile long ret;
139 volatile uint16_t cpu;
140 } kdp_xcpu_call_func = {
141 .cpu = KDP_XCPU_NONE
142 };
143
144 #endif
145
146 /* Variables needed for MP broadcast. */
147 static void (*mp_bc_action_func)(void *arg);
148 static void *mp_bc_func_arg;
149 static int mp_bc_ncpus;
150 static volatile long mp_bc_count;
151 decl_lck_mtx_data(static, mp_bc_lock);
152 lck_mtx_ext_t mp_bc_lock_ext;
153 static volatile int debugger_cpu = -1;
154 volatile long NMIPI_acks = 0;
155
156 static void mp_cpus_call_init(void);
157 static void mp_cpus_call_cpu_init(void);
158 static void mp_cpus_call_action(void);
159 static void mp_call_PM(void);
160
161 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
162
163 /* PAL-related routines */
164 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
165 int ipi_vector, i386_intr_func_t ipi_handler);
166 void i386_start_cpu(int lapic_id, int cpu_num);
167 void i386_send_NMI(int cpu);
168
169 #if GPROF
170 /*
171 * Initialize dummy structs for profiling. These aren't used but
172 * allows hertz_tick() to be built with GPROF defined.
173 */
174 struct profile_vars _profile_vars;
175 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
176 #define GPROF_INIT() \
177 { \
178 int i; \
179 \
180 /* Hack to initialize pointers to unused profiling structs */ \
181 for (i = 1; i < MAX_CPUS; i++) \
182 _profile_vars_cpus[i] = &_profile_vars; \
183 }
184 #else
185 #define GPROF_INIT()
186 #endif /* GPROF */
187
188 static lck_grp_t smp_lck_grp;
189 static lck_grp_attr_t smp_lck_grp_attr;
190
191 #define NUM_CPU_WARM_CALLS 20
192 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
193 queue_head_t cpu_warm_call_list;
194 decl_simple_lock_data(static, cpu_warm_lock);
195
196 typedef struct cpu_warm_data {
197 timer_call_t cwd_call;
198 uint64_t cwd_deadline;
199 int cwd_result;
200 } *cpu_warm_data_t;
201
202 static void cpu_prewarm_init(void);
203 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
204 static void _cpu_warm_setup(void *arg);
205 static timer_call_t grab_warm_timer_call(void);
206 static void free_warm_timer_call(timer_call_t call);
207
208 void
209 smp_init(void)
210 {
211 simple_lock_init(&mp_kdp_lock, 0);
212 simple_lock_init(&mp_rv_lock, 0);
213 lck_grp_attr_setdefault(&smp_lck_grp_attr);
214 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
215 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
216 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
217 console_init();
218
219 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
220 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
221 return;
222
223 cpu_thread_init();
224
225 GPROF_INIT();
226 DBGLOG_CPU_INIT(master_cpu);
227
228 mp_cpus_call_init();
229 mp_cpus_call_cpu_init();
230
231 if (PE_parse_boot_argn("TSC_sync_margin",
232 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
233 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
234 } else if (cpuid_vmm_present()) {
235 kprintf("TSC sync margin disabled\n");
236 TSC_sync_margin = 0;
237 }
238 smp_initialized = TRUE;
239
240 cpu_prewarm_init();
241
242 return;
243 }
244
245 typedef struct {
246 int target_cpu;
247 int target_lapic;
248 int starter_cpu;
249 } processor_start_info_t;
250 static processor_start_info_t start_info __attribute__((aligned(64)));
251
252 /*
253 * Cache-alignment is to avoid cross-cpu false-sharing interference.
254 */
255 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
256 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
257 static volatile uint64_t tsc_target __attribute__((aligned(64)));
258
259 /*
260 * Poll a CPU to see when it has marked itself as running.
261 */
262 static void
263 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
264 {
265 while (iters-- > 0) {
266 if (cpu_datap(slot_num)->cpu_running)
267 break;
268 delay(usecdelay);
269 }
270 }
271
272 /*
273 * Quickly bring a CPU back online which has been halted.
274 */
275 kern_return_t
276 intel_startCPU_fast(int slot_num)
277 {
278 kern_return_t rc;
279
280 /*
281 * Try to perform a fast restart
282 */
283 rc = pmCPUExitHalt(slot_num);
284 if (rc != KERN_SUCCESS)
285 /*
286 * The CPU was not eligible for a fast restart.
287 */
288 return(rc);
289
290 /*
291 * Wait until the CPU is back online.
292 */
293 mp_disable_preemption();
294
295 /*
296 * We use short pauses (1us) for low latency. 30,000 iterations is
297 * longer than a full restart would require so it should be more
298 * than long enough.
299 */
300
301 mp_wait_for_cpu_up(slot_num, 30000, 1);
302 mp_enable_preemption();
303
304 /*
305 * Check to make sure that the CPU is really running. If not,
306 * go through the slow path.
307 */
308 if (cpu_datap(slot_num)->cpu_running)
309 return(KERN_SUCCESS);
310 else
311 return(KERN_FAILURE);
312 }
313
314 static void
315 started_cpu(void)
316 {
317 /* Here on the started cpu with cpu_running set TRUE */
318
319 if (TSC_sync_margin &&
320 start_info.target_cpu == cpu_number()) {
321 /*
322 * I've just started-up, synchronize again with the starter cpu
323 * and then snap my TSC.
324 */
325 tsc_target = 0;
326 atomic_decl(&tsc_entry_barrier, 1);
327 while (tsc_entry_barrier != 0)
328 ; /* spin for starter and target at barrier */
329 tsc_target = rdtsc64();
330 atomic_decl(&tsc_exit_barrier, 1);
331 }
332 }
333
334 static void
335 start_cpu(void *arg)
336 {
337 int i = 1000;
338 processor_start_info_t *psip = (processor_start_info_t *) arg;
339
340 /* Ignore this if the current processor is not the starter */
341 if (cpu_number() != psip->starter_cpu)
342 return;
343
344 i386_start_cpu(psip->target_lapic, psip->target_cpu);
345
346 #ifdef POSTCODE_DELAY
347 /* Wait much longer if postcodes are displayed for a delay period. */
348 i *= 10000;
349 #endif
350 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
351 if (TSC_sync_margin &&
352 cpu_datap(psip->target_cpu)->cpu_running) {
353 /*
354 * Compare the TSC from the started processor with ours.
355 * Report and log/panic if it diverges by more than
356 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
357 * can be overriden by boot-arg (with 0 meaning no checking).
358 */
359 uint64_t tsc_starter;
360 int64_t tsc_delta;
361 atomic_decl(&tsc_entry_barrier, 1);
362 while (tsc_entry_barrier != 0)
363 ; /* spin for both processors at barrier */
364 tsc_starter = rdtsc64();
365 atomic_decl(&tsc_exit_barrier, 1);
366 while (tsc_exit_barrier != 0)
367 ; /* spin for target to store its TSC */
368 tsc_delta = tsc_target - tsc_starter;
369 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
370 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
371 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
372 #if DEBUG
373 panic(
374 #else
375 printf(
376 #endif
377 "Unsynchronized TSC for cpu %d: "
378 "0x%016llx, delta 0x%llx\n",
379 psip->target_cpu, tsc_target, tsc_delta);
380 }
381 }
382 }
383
384 kern_return_t
385 intel_startCPU(
386 int slot_num)
387 {
388 int lapic = cpu_to_lapic[slot_num];
389 boolean_t istate;
390
391 assert(lapic != -1);
392
393 DBGLOG_CPU_INIT(slot_num);
394
395 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
396 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
397
398 /*
399 * Initialize (or re-initialize) the descriptor tables for this cpu.
400 * Propagate processor mode to slave.
401 */
402 if (cpu_mode_is64bit())
403 cpu_desc_init64(cpu_datap(slot_num));
404 else
405 cpu_desc_init(cpu_datap(slot_num));
406
407 /* Serialize use of the slave boot stack, etc. */
408 lck_mtx_lock(&mp_cpu_boot_lock);
409
410 istate = ml_set_interrupts_enabled(FALSE);
411 if (slot_num == get_cpu_number()) {
412 ml_set_interrupts_enabled(istate);
413 lck_mtx_unlock(&mp_cpu_boot_lock);
414 return KERN_SUCCESS;
415 }
416
417 start_info.starter_cpu = cpu_number();
418 start_info.target_cpu = slot_num;
419 start_info.target_lapic = lapic;
420 tsc_entry_barrier = 2;
421 tsc_exit_barrier = 2;
422
423 /*
424 * Perform the processor startup sequence with all running
425 * processors rendezvous'ed. This is required during periods when
426 * the cache-disable bit is set for MTRR/PAT initialization.
427 */
428 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
429
430 start_info.target_cpu = 0;
431
432 ml_set_interrupts_enabled(istate);
433 lck_mtx_unlock(&mp_cpu_boot_lock);
434
435 if (!cpu_datap(slot_num)->cpu_running) {
436 kprintf("Failed to start CPU %02d\n", slot_num);
437 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
438 delay(1000000);
439 halt_cpu();
440 return KERN_SUCCESS;
441 } else {
442 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
443 return KERN_SUCCESS;
444 }
445 }
446
447 #if MP_DEBUG
448 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
449 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
450
451 MP_EVENT_NAME_DECL();
452
453 #endif /* MP_DEBUG */
454
455 int
456 cpu_signal_handler(x86_saved_state_t *regs)
457 {
458 int my_cpu;
459 volatile int *my_word;
460
461 SCHED_STATS_IPI(current_processor());
462
463 my_cpu = cpu_number();
464 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
465 /* Store the initial set of signals for diagnostics. New
466 * signals could arrive while these are being processed
467 * so it's no more than a hint.
468 */
469
470 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
471
472 do {
473 #if MACH_KDP
474 if (i_bit(MP_KDP, my_word)) {
475 DBGLOG(cpu_handle,my_cpu,MP_KDP);
476 i_bit_clear(MP_KDP, my_word);
477 /* Ensure that the i386_kernel_state at the base of the
478 * current thread's stack (if any) is synchronized with the
479 * context at the moment of the interrupt, to facilitate
480 * access through the debugger.
481 */
482 sync_iss_to_iks(regs);
483 if (pmsafe_debug && !kdp_snapshot)
484 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
485 mp_kdp_wait(TRUE, FALSE);
486 if (pmsafe_debug && !kdp_snapshot)
487 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
488 } else
489 #endif /* MACH_KDP */
490 if (i_bit(MP_TLB_FLUSH, my_word)) {
491 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
492 i_bit_clear(MP_TLB_FLUSH, my_word);
493 pmap_update_interrupt();
494 } else if (i_bit(MP_AST, my_word)) {
495 DBGLOG(cpu_handle,my_cpu,MP_AST);
496 i_bit_clear(MP_AST, my_word);
497 ast_check(cpu_to_processor(my_cpu));
498 } else if (i_bit(MP_RENDEZVOUS, my_word)) {
499 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
500 i_bit_clear(MP_RENDEZVOUS, my_word);
501 mp_rendezvous_action();
502 } else if (i_bit(MP_BROADCAST, my_word)) {
503 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
504 i_bit_clear(MP_BROADCAST, my_word);
505 mp_broadcast_action();
506 } else if (i_bit(MP_CHUD, my_word)) {
507 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
508 i_bit_clear(MP_CHUD, my_word);
509 chudxnu_cpu_signal_handler();
510 } else if (i_bit(MP_CALL, my_word)) {
511 DBGLOG(cpu_handle,my_cpu,MP_CALL);
512 i_bit_clear(MP_CALL, my_word);
513 mp_cpus_call_action();
514 } else if (i_bit(MP_CALL_PM, my_word)) {
515 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
516 i_bit_clear(MP_CALL_PM, my_word);
517 mp_call_PM();
518 }
519 } while (*my_word);
520
521 return 0;
522 }
523
524 static int
525 NMIInterruptHandler(x86_saved_state_t *regs)
526 {
527 void *stackptr;
528
529 if (panic_active() && !panicDebugging) {
530 if (pmsafe_debug)
531 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
532 for(;;)
533 cpu_pause();
534 }
535
536 atomic_incl(&NMIPI_acks, 1);
537 sync_iss_to_iks_unconditionally(regs);
538 #if defined (__i386__)
539 __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
540 #elif defined (__x86_64__)
541 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
542 #endif
543
544 if (cpu_number() == debugger_cpu)
545 goto NMExit;
546
547 if (spinlock_timed_out) {
548 char pstr[192];
549 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
550 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
551 } else if (pmap_tlb_flush_timeout == TRUE) {
552 char pstr[128];
553 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
554 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
555 }
556
557 #if MACH_KDP
558 if (pmsafe_debug && !kdp_snapshot)
559 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
560 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
561 mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
562 if (pmsafe_debug && !kdp_snapshot)
563 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
564 #endif
565 NMExit:
566 return 1;
567 }
568
569
570 /*
571 * cpu_interrupt is really just to be used by the scheduler to
572 * get a CPU's attention it may not always issue an IPI. If an
573 * IPI is always needed then use i386_cpu_IPI.
574 */
575 void
576 cpu_interrupt(int cpu)
577 {
578 boolean_t did_IPI = FALSE;
579
580 if (smp_initialized
581 && pmCPUExitIdle(cpu_datap(cpu))) {
582 i386_cpu_IPI(cpu);
583 did_IPI = TRUE;
584 }
585
586 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
587 }
588
589 /*
590 * Send a true NMI via the local APIC to the specified CPU.
591 */
592 void
593 cpu_NMI_interrupt(int cpu)
594 {
595 if (smp_initialized) {
596 i386_send_NMI(cpu);
597 }
598 }
599
600 static void (* volatile mp_PM_func)(void) = NULL;
601
602 static void
603 mp_call_PM(void)
604 {
605 assert(!ml_get_interrupts_enabled());
606
607 if (mp_PM_func != NULL)
608 mp_PM_func();
609 }
610
611 void
612 cpu_PM_interrupt(int cpu)
613 {
614 assert(!ml_get_interrupts_enabled());
615
616 if (mp_PM_func != NULL) {
617 if (cpu == cpu_number())
618 mp_PM_func();
619 else
620 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
621 }
622 }
623
624 void
625 PM_interrupt_register(void (*fn)(void))
626 {
627 mp_PM_func = fn;
628 }
629
630 void
631 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
632 {
633 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
634 uint64_t tsc_timeout;
635
636
637 if (!cpu_datap(cpu)->cpu_running)
638 return;
639
640 if (event == MP_TLB_FLUSH)
641 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
642
643 DBGLOG(cpu_signal, cpu, event);
644
645 i_bit_set(event, signals);
646 i386_cpu_IPI(cpu);
647 if (mode == SYNC) {
648 again:
649 tsc_timeout = rdtsc64() + (1000*1000*1000);
650 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
651 cpu_pause();
652 }
653 if (i_bit(event, signals)) {
654 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
655 cpu, event);
656 goto again;
657 }
658 }
659 if (event == MP_TLB_FLUSH)
660 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
661 }
662
663 /*
664 * Send event to all running cpus.
665 * Called with the topology locked.
666 */
667 void
668 i386_signal_cpus(mp_event_t event, mp_sync_t mode)
669 {
670 unsigned int cpu;
671 unsigned int my_cpu = cpu_number();
672
673 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
674
675 for (cpu = 0; cpu < real_ncpus; cpu++) {
676 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
677 continue;
678 i386_signal_cpu(cpu, event, mode);
679 }
680 }
681
682 /*
683 * Return the number of running cpus.
684 * Called with the topology locked.
685 */
686 int
687 i386_active_cpus(void)
688 {
689 unsigned int cpu;
690 unsigned int ncpus = 0;
691
692 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
693
694 for (cpu = 0; cpu < real_ncpus; cpu++) {
695 if (cpu_datap(cpu)->cpu_running)
696 ncpus++;
697 }
698 return(ncpus);
699 }
700
701 /*
702 * All-CPU rendezvous:
703 * - CPUs are signalled,
704 * - all execute the setup function (if specified),
705 * - rendezvous (i.e. all cpus reach a barrier),
706 * - all execute the action function (if specified),
707 * - rendezvous again,
708 * - execute the teardown function (if specified), and then
709 * - resume.
710 *
711 * Note that the supplied external functions _must_ be reentrant and aware
712 * that they are running in parallel and in an unknown lock context.
713 */
714
715 static void
716 mp_rendezvous_action(void)
717 {
718 boolean_t intrs_enabled;
719
720 /* setup function */
721 if (mp_rv_setup_func != NULL)
722 mp_rv_setup_func(mp_rv_func_arg);
723
724 intrs_enabled = ml_get_interrupts_enabled();
725
726 /* spin on entry rendezvous */
727 atomic_incl(&mp_rv_entry, 1);
728 while (mp_rv_entry < mp_rv_ncpus) {
729 /* poll for pesky tlb flushes if interrupts disabled */
730 if (!intrs_enabled)
731 handle_pending_TLB_flushes();
732 cpu_pause();
733 }
734
735 /* action function */
736 if (mp_rv_action_func != NULL)
737 mp_rv_action_func(mp_rv_func_arg);
738
739 /* spin on exit rendezvous */
740 atomic_incl(&mp_rv_exit, 1);
741 while (mp_rv_exit < mp_rv_ncpus) {
742 if (!intrs_enabled)
743 handle_pending_TLB_flushes();
744 cpu_pause();
745 }
746
747 /* teardown function */
748 if (mp_rv_teardown_func != NULL)
749 mp_rv_teardown_func(mp_rv_func_arg);
750
751 /* Bump completion count */
752 atomic_incl(&mp_rv_complete, 1);
753 }
754
755 void
756 mp_rendezvous(void (*setup_func)(void *),
757 void (*action_func)(void *),
758 void (*teardown_func)(void *),
759 void *arg)
760 {
761
762 if (!smp_initialized) {
763 if (setup_func != NULL)
764 setup_func(arg);
765 if (action_func != NULL)
766 action_func(arg);
767 if (teardown_func != NULL)
768 teardown_func(arg);
769 return;
770 }
771
772 /* obtain rendezvous lock */
773 simple_lock(&mp_rv_lock);
774
775 /* set static function pointers */
776 mp_rv_setup_func = setup_func;
777 mp_rv_action_func = action_func;
778 mp_rv_teardown_func = teardown_func;
779 mp_rv_func_arg = arg;
780
781 mp_rv_entry = 0;
782 mp_rv_exit = 0;
783 mp_rv_complete = 0;
784
785 /*
786 * signal other processors, which will call mp_rendezvous_action()
787 * with interrupts disabled
788 */
789 simple_lock(&x86_topo_lock);
790 mp_rv_ncpus = i386_active_cpus();
791 i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
792 simple_unlock(&x86_topo_lock);
793
794 /* call executor function on this cpu */
795 mp_rendezvous_action();
796
797 /*
798 * Spin for everyone to complete.
799 * This is necessary to ensure that all processors have proceeded
800 * from the exit barrier before we release the rendezvous structure.
801 */
802 while (mp_rv_complete < mp_rv_ncpus) {
803 cpu_pause();
804 }
805
806 /* Tidy up */
807 mp_rv_setup_func = NULL;
808 mp_rv_action_func = NULL;
809 mp_rv_teardown_func = NULL;
810 mp_rv_func_arg = NULL;
811
812 /* release lock */
813 simple_unlock(&mp_rv_lock);
814 }
815
816 void
817 mp_rendezvous_break_lock(void)
818 {
819 simple_lock_init(&mp_rv_lock, 0);
820 }
821
822 static void
823 setup_disable_intrs(__unused void * param_not_used)
824 {
825 /* disable interrupts before the first barrier */
826 boolean_t intr = ml_set_interrupts_enabled(FALSE);
827
828 current_cpu_datap()->cpu_iflag = intr;
829 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
830 }
831
832 static void
833 teardown_restore_intrs(__unused void * param_not_used)
834 {
835 /* restore interrupt flag following MTRR changes */
836 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
837 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
838 }
839
840 /*
841 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
842 * This is exported for use by kexts.
843 */
844 void
845 mp_rendezvous_no_intrs(
846 void (*action_func)(void *),
847 void *arg)
848 {
849 mp_rendezvous(setup_disable_intrs,
850 action_func,
851 teardown_restore_intrs,
852 arg);
853 }
854
855
856 typedef struct {
857 queue_chain_t link; /* queue linkage */
858 void (*func)(void *,void *); /* routine to call */
859 void *arg0; /* routine's 1st arg */
860 void *arg1; /* routine's 2nd arg */
861 volatile long *countp; /* completion counter */
862 } mp_call_t;
863
864
865 typedef struct {
866 queue_head_t queue;
867 decl_simple_lock_data(, lock);
868 } mp_call_queue_t;
869 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
870 static mp_call_queue_t mp_cpus_call_freelist;
871 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
872
873 static inline boolean_t
874 mp_call_head_lock(mp_call_queue_t *cqp)
875 {
876 boolean_t intrs_enabled;
877
878 intrs_enabled = ml_set_interrupts_enabled(FALSE);
879 simple_lock(&cqp->lock);
880
881 return intrs_enabled;
882 }
883
884 static inline boolean_t
885 mp_call_head_is_locked(mp_call_queue_t *cqp)
886 {
887 return !ml_get_interrupts_enabled() &&
888 hw_lock_held((hw_lock_t)&cqp->lock);
889 }
890
891 static inline void
892 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
893 {
894 simple_unlock(&cqp->lock);
895 ml_set_interrupts_enabled(intrs_enabled);
896 }
897
898 static inline mp_call_t *
899 mp_call_alloc(void)
900 {
901 mp_call_t *callp = NULL;
902 boolean_t intrs_enabled;
903 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
904
905 intrs_enabled = mp_call_head_lock(cqp);
906 if (!queue_empty(&cqp->queue))
907 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
908 mp_call_head_unlock(cqp, intrs_enabled);
909
910 return callp;
911 }
912
913 static inline void
914 mp_call_free(mp_call_t *callp)
915 {
916 boolean_t intrs_enabled;
917 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
918
919 intrs_enabled = mp_call_head_lock(cqp);
920 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
921 mp_call_head_unlock(cqp, intrs_enabled);
922 }
923
924 static inline mp_call_t *
925 mp_call_dequeue_locked(mp_call_queue_t *cqp)
926 {
927 mp_call_t *callp = NULL;
928
929 assert(mp_call_head_is_locked(cqp));
930 if (!queue_empty(&cqp->queue))
931 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
932 return callp;
933 }
934
935 static inline void
936 mp_call_enqueue_locked(
937 mp_call_queue_t *cqp,
938 mp_call_t *callp)
939 {
940 queue_enter(&cqp->queue, callp, typeof(callp), link);
941 }
942
943 /* Called on the boot processor to initialize global structures */
944 static void
945 mp_cpus_call_init(void)
946 {
947 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
948
949 DBG("mp_cpus_call_init()\n");
950 simple_lock_init(&cqp->lock, 0);
951 queue_init(&cqp->queue);
952 }
953
954 /*
955 * Called by each processor to add call buffers to the free list
956 * and to initialize the per-cpu call queue.
957 * Also called but ignored on slave processors on re-start/wake.
958 */
959 static void
960 mp_cpus_call_cpu_init(void)
961 {
962 int i;
963 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()];
964 mp_call_t *callp;
965
966 if (cqp->queue.next != NULL)
967 return; /* restart/wake case: called already */
968
969 simple_lock_init(&cqp->lock, 0);
970 queue_init(&cqp->queue);
971 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
972 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
973 mp_call_free(callp);
974 }
975
976 DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
977 }
978
979 /*
980 * This is called from cpu_signal_handler() to process an MP_CALL signal.
981 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
982 */
983 static void
984 mp_cpus_call_action(void)
985 {
986 mp_call_queue_t *cqp;
987 boolean_t intrs_enabled;
988 mp_call_t *callp;
989 mp_call_t call;
990
991 assert(!ml_get_interrupts_enabled());
992 cqp = &mp_cpus_call_head[cpu_number()];
993 intrs_enabled = mp_call_head_lock(cqp);
994 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
995 /* Copy call request to the stack to free buffer */
996 call = *callp;
997 mp_call_free(callp);
998 if (call.func != NULL) {
999 mp_call_head_unlock(cqp, intrs_enabled);
1000 KERNEL_DEBUG_CONSTANT(
1001 TRACE_MP_CPUS_CALL_ACTION,
1002 call.func, call.arg0, call.arg1, call.countp, 0);
1003 call.func(call.arg0, call.arg1);
1004 (void) mp_call_head_lock(cqp);
1005 }
1006 if (call.countp != NULL)
1007 atomic_incl(call.countp, 1);
1008 }
1009 mp_call_head_unlock(cqp, intrs_enabled);
1010 }
1011
1012 /*
1013 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1014 * Possible modes are:
1015 * SYNC: function is called serially on target cpus in logical cpu order
1016 * waiting for each call to be acknowledged before proceeding
1017 * ASYNC: function call is queued to the specified cpus
1018 * waiting for all calls to complete in parallel before returning
1019 * NOSYNC: function calls are queued
1020 * but we return before confirmation of calls completing.
1021 * The action function may be NULL.
1022 * The cpu mask may include the local cpu. Offline cpus are ignored.
1023 * The return value is the number of cpus on which the call was made or queued.
1024 */
1025 cpu_t
1026 mp_cpus_call(
1027 cpumask_t cpus,
1028 mp_sync_t mode,
1029 void (*action_func)(void *),
1030 void *arg)
1031 {
1032 return mp_cpus_call1(
1033 cpus,
1034 mode,
1035 (void (*)(void *,void *))action_func,
1036 arg,
1037 NULL,
1038 NULL,
1039 NULL);
1040 }
1041
1042 static void
1043 mp_cpus_call_wait(boolean_t intrs_enabled,
1044 long mp_cpus_signals,
1045 volatile long *mp_cpus_calls)
1046 {
1047 mp_call_queue_t *cqp;
1048
1049 cqp = &mp_cpus_call_head[cpu_number()];
1050
1051 while (*mp_cpus_calls < mp_cpus_signals) {
1052 if (!intrs_enabled) {
1053 /* Sniffing w/o locking */
1054 if (!queue_empty(&cqp->queue))
1055 mp_cpus_call_action();
1056 handle_pending_TLB_flushes();
1057 }
1058 cpu_pause();
1059 }
1060 }
1061
1062 cpu_t
1063 mp_cpus_call1(
1064 cpumask_t cpus,
1065 mp_sync_t mode,
1066 void (*action_func)(void *, void *),
1067 void *arg0,
1068 void *arg1,
1069 cpumask_t *cpus_calledp,
1070 cpumask_t *cpus_notcalledp)
1071 {
1072 cpu_t cpu;
1073 boolean_t intrs_enabled = FALSE;
1074 boolean_t call_self = FALSE;
1075 cpumask_t cpus_called = 0;
1076 cpumask_t cpus_notcalled = 0;
1077 long mp_cpus_signals = 0;
1078 volatile long mp_cpus_calls = 0;
1079
1080 KERNEL_DEBUG_CONSTANT(
1081 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1082 cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
1083
1084 if (!smp_initialized) {
1085 if ((cpus & CPUMASK_SELF) == 0)
1086 goto out;
1087 if (action_func != NULL) {
1088 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1089 action_func(arg0, arg1);
1090 ml_set_interrupts_enabled(intrs_enabled);
1091 }
1092 call_self = TRUE;
1093 goto out;
1094 }
1095
1096 /*
1097 * Queue the call for each non-local requested cpu.
1098 * The topo lock is not taken. Instead we sniff the cpu_running state
1099 * and then re-check it after taking the call lock. A cpu being taken
1100 * offline runs the action function after clearing the cpu_running.
1101 */
1102 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1103 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1104 !cpu_datap(cpu)->cpu_running)
1105 continue;
1106 if (cpu == (cpu_t) cpu_number()) {
1107 /*
1108 * We don't IPI ourself and if calling asynchronously,
1109 * we defer our call until we have signalled all others.
1110 */
1111 call_self = TRUE;
1112 cpus_called |= cpu_to_cpumask(cpu);
1113 if (mode == SYNC && action_func != NULL) {
1114 KERNEL_DEBUG_CONSTANT(
1115 TRACE_MP_CPUS_CALL_LOCAL,
1116 VM_KERNEL_UNSLIDE(action_func),
1117 arg0, arg1, 0, 0);
1118 action_func(arg0, arg1);
1119 }
1120 } else {
1121 /*
1122 * Here to queue a call to cpu and IPI.
1123 * Spinning for request buffer unless NOSYNC.
1124 */
1125 mp_call_t *callp = NULL;
1126 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1127
1128 queue_call:
1129 if (callp == NULL)
1130 callp = mp_call_alloc();
1131 intrs_enabled = mp_call_head_lock(cqp);
1132 if (!cpu_datap(cpu)->cpu_running) {
1133 mp_call_head_unlock(cqp, intrs_enabled);
1134 continue;
1135 }
1136 if (mode == NOSYNC) {
1137 if (callp == NULL) {
1138 cpus_notcalled |= cpu_to_cpumask(cpu);
1139 mp_call_head_unlock(cqp, intrs_enabled);
1140 KERNEL_DEBUG_CONSTANT(
1141 TRACE_MP_CPUS_CALL_NOBUF,
1142 cpu, 0, 0, 0, 0);
1143 continue;
1144 }
1145 callp->countp = NULL;
1146 } else {
1147 if (callp == NULL) {
1148 mp_call_head_unlock(cqp, intrs_enabled);
1149 KERNEL_DEBUG_CONSTANT(
1150 TRACE_MP_CPUS_CALL_NOBUF,
1151 cpu, 0, 0, 0, 0);
1152 if (!intrs_enabled) {
1153 /* Sniffing w/o locking */
1154 if (!queue_empty(&cqp->queue))
1155 mp_cpus_call_action();
1156 handle_pending_TLB_flushes();
1157 }
1158 cpu_pause();
1159 goto queue_call;
1160 }
1161 callp->countp = &mp_cpus_calls;
1162 }
1163 callp->func = action_func;
1164 callp->arg0 = arg0;
1165 callp->arg1 = arg1;
1166 mp_call_enqueue_locked(cqp, callp);
1167 mp_cpus_signals++;
1168 cpus_called |= cpu_to_cpumask(cpu);
1169 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1170 mp_call_head_unlock(cqp, intrs_enabled);
1171 if (mode == SYNC) {
1172 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1173 }
1174 }
1175 }
1176
1177 /* Call locally if mode not SYNC */
1178 if (mode != SYNC && call_self ) {
1179 KERNEL_DEBUG_CONSTANT(
1180 TRACE_MP_CPUS_CALL_LOCAL,
1181 VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
1182 if (action_func != NULL) {
1183 ml_set_interrupts_enabled(FALSE);
1184 action_func(arg0, arg1);
1185 ml_set_interrupts_enabled(intrs_enabled);
1186 }
1187 }
1188
1189 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1190 if (mode == ASYNC) {
1191 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1192 }
1193
1194 out:
1195 cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
1196
1197 if (cpus_calledp)
1198 *cpus_calledp = cpus_called;
1199 if (cpus_notcalledp)
1200 *cpus_notcalledp = cpus_notcalled;
1201
1202 KERNEL_DEBUG_CONSTANT(
1203 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1204 cpu, cpus_called, cpus_notcalled, 0, 0);
1205
1206 return cpu;
1207 }
1208
1209
1210 static void
1211 mp_broadcast_action(void)
1212 {
1213 /* call action function */
1214 if (mp_bc_action_func != NULL)
1215 mp_bc_action_func(mp_bc_func_arg);
1216
1217 /* if we're the last one through, wake up the instigator */
1218 if (atomic_decl_and_test(&mp_bc_count, 1))
1219 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1220 }
1221
1222 /*
1223 * mp_broadcast() runs a given function on all active cpus.
1224 * The caller blocks until the functions has run on all cpus.
1225 * The caller will also block if there is another pending braodcast.
1226 */
1227 void
1228 mp_broadcast(
1229 void (*action_func)(void *),
1230 void *arg)
1231 {
1232 if (!smp_initialized) {
1233 if (action_func != NULL)
1234 action_func(arg);
1235 return;
1236 }
1237
1238 /* obtain broadcast lock */
1239 lck_mtx_lock(&mp_bc_lock);
1240
1241 /* set static function pointers */
1242 mp_bc_action_func = action_func;
1243 mp_bc_func_arg = arg;
1244
1245 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1246
1247 /*
1248 * signal other processors, which will call mp_broadcast_action()
1249 */
1250 simple_lock(&x86_topo_lock);
1251 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */
1252 mp_bc_count = mp_bc_ncpus;
1253 i386_signal_cpus(MP_BROADCAST, ASYNC);
1254
1255 /* call executor function on this cpu */
1256 mp_broadcast_action();
1257 simple_unlock(&x86_topo_lock);
1258
1259 /* block for all cpus to have run action_func */
1260 if (mp_bc_ncpus > 1)
1261 thread_block(THREAD_CONTINUE_NULL);
1262 else
1263 clear_wait(current_thread(), THREAD_AWAKENED);
1264
1265 /* release lock */
1266 lck_mtx_unlock(&mp_bc_lock);
1267 }
1268
1269 void
1270 i386_activate_cpu(void)
1271 {
1272 cpu_data_t *cdp = current_cpu_datap();
1273
1274 assert(!ml_get_interrupts_enabled());
1275
1276 if (!smp_initialized) {
1277 cdp->cpu_running = TRUE;
1278 return;
1279 }
1280
1281 simple_lock(&x86_topo_lock);
1282 cdp->cpu_running = TRUE;
1283 started_cpu();
1284 simple_unlock(&x86_topo_lock);
1285 flush_tlb_raw();
1286 }
1287
1288 extern void etimer_timer_expire(void *arg);
1289
1290 void
1291 i386_deactivate_cpu(void)
1292 {
1293 cpu_data_t *cdp = current_cpu_datap();
1294
1295 assert(!ml_get_interrupts_enabled());
1296
1297 simple_lock(&x86_topo_lock);
1298 cdp->cpu_running = FALSE;
1299 simple_unlock(&x86_topo_lock);
1300
1301 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1302 cdp->rtclock_timer.deadline = EndOfAllTime;
1303 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
1304
1305 /*
1306 * In case a rendezvous/braodcast/call was initiated to this cpu
1307 * before we cleared cpu_running, we must perform any actions due.
1308 */
1309 if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals))
1310 mp_rendezvous_action();
1311 if (i_bit(MP_BROADCAST, &cdp->cpu_signals))
1312 mp_broadcast_action();
1313 if (i_bit(MP_CALL, &cdp->cpu_signals))
1314 mp_cpus_call_action();
1315 cdp->cpu_signals = 0; /* all clear */
1316 }
1317
1318 int pmsafe_debug = 1;
1319
1320 #if MACH_KDP
1321 volatile boolean_t mp_kdp_trap = FALSE;
1322 volatile unsigned long mp_kdp_ncpus;
1323 boolean_t mp_kdp_state;
1324
1325
1326 void
1327 mp_kdp_enter(void)
1328 {
1329 unsigned int cpu;
1330 unsigned int ncpus = 0;
1331 unsigned int my_cpu;
1332 uint64_t tsc_timeout;
1333
1334 DBG("mp_kdp_enter()\n");
1335
1336 /*
1337 * Here to enter the debugger.
1338 * In case of races, only one cpu is allowed to enter kdp after
1339 * stopping others.
1340 */
1341 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1342 my_cpu = cpu_number();
1343
1344 if (my_cpu == (unsigned) debugger_cpu) {
1345 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1346 kdp_reset();
1347 return;
1348 }
1349
1350 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1351 simple_lock(&mp_kdp_lock);
1352
1353 if (pmsafe_debug && !kdp_snapshot)
1354 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1355
1356 while (mp_kdp_trap) {
1357 simple_unlock(&mp_kdp_lock);
1358 DBG("mp_kdp_enter() race lost\n");
1359 #if MACH_KDP
1360 mp_kdp_wait(TRUE, FALSE);
1361 #endif
1362 simple_lock(&mp_kdp_lock);
1363 }
1364 debugger_cpu = my_cpu;
1365 ncpus = 1;
1366 mp_kdp_ncpus = 1; /* self */
1367 mp_kdp_trap = TRUE;
1368 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1369 simple_unlock(&mp_kdp_lock);
1370
1371 /*
1372 * Deliver a nudge to other cpus, counting how many
1373 */
1374 DBG("mp_kdp_enter() signaling other processors\n");
1375 if (force_immediate_debugger_NMI == FALSE) {
1376 for (cpu = 0; cpu < real_ncpus; cpu++) {
1377 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1378 continue;
1379 ncpus++;
1380 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1381 }
1382 /*
1383 * Wait other processors to synchronize
1384 */
1385 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1386
1387 /*
1388 * This timeout is rather arbitrary; we don't want to NMI
1389 * processors that are executing at potentially
1390 * "unsafe-to-interrupt" points such as the trampolines,
1391 * but neither do we want to lose state by waiting too long.
1392 */
1393 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL);
1394
1395 if (virtualized)
1396 tsc_timeout = ~0ULL;
1397
1398 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1399 /*
1400 * A TLB shootdown request may be pending--this would
1401 * result in the requesting processor waiting in
1402 * PMAP_UPDATE_TLBS() until this processor deals with it.
1403 * Process it, so it can now enter mp_kdp_wait()
1404 */
1405 handle_pending_TLB_flushes();
1406 cpu_pause();
1407 }
1408 /* If we've timed out, and some processor(s) are still unresponsive,
1409 * interrupt them with an NMI via the local APIC.
1410 */
1411 if (mp_kdp_ncpus != ncpus) {
1412 for (cpu = 0; cpu < real_ncpus; cpu++) {
1413 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1414 continue;
1415 if (cpu_signal_pending(cpu, MP_KDP))
1416 cpu_NMI_interrupt(cpu);
1417 }
1418 }
1419 }
1420 else
1421 for (cpu = 0; cpu < real_ncpus; cpu++) {
1422 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1423 continue;
1424 cpu_NMI_interrupt(cpu);
1425 }
1426
1427 DBG("mp_kdp_enter() %u processors done %s\n",
1428 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1429
1430 postcode(MP_KDP_ENTER);
1431 }
1432
1433 static boolean_t
1434 cpu_signal_pending(int cpu, mp_event_t event)
1435 {
1436 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1437 boolean_t retval = FALSE;
1438
1439 if (i_bit(event, signals))
1440 retval = TRUE;
1441 return retval;
1442 }
1443
1444 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1445 void *arg0, void *arg1)
1446 {
1447 if (lcpu > (real_ncpus - 1))
1448 return -1;
1449
1450 if (func == NULL)
1451 return -1;
1452
1453 kdp_xcpu_call_func.func = func;
1454 kdp_xcpu_call_func.ret = -1;
1455 kdp_xcpu_call_func.arg0 = arg0;
1456 kdp_xcpu_call_func.arg1 = arg1;
1457 kdp_xcpu_call_func.cpu = lcpu;
1458 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1459 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1460 cpu_pause();
1461 return kdp_xcpu_call_func.ret;
1462 }
1463
1464 static void
1465 kdp_x86_xcpu_poll(void)
1466 {
1467 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1468 kdp_xcpu_call_func.ret =
1469 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1470 kdp_xcpu_call_func.arg1,
1471 cpu_number());
1472 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1473 }
1474 }
1475
1476 static void
1477 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1478 {
1479 DBG("mp_kdp_wait()\n");
1480 /* If an I/O port has been specified as a debugging aid, issue a read */
1481 panic_io_port_read();
1482
1483 #if CONFIG_MCA
1484 /* If we've trapped due to a machine-check, save MCA registers */
1485 mca_check_save();
1486 #endif
1487
1488 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1489 while (mp_kdp_trap || (isNMI == TRUE)) {
1490 /*
1491 * A TLB shootdown request may be pending--this would result
1492 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1493 * until this processor handles it.
1494 * Process it, so it can now enter mp_kdp_wait()
1495 */
1496 if (flush)
1497 handle_pending_TLB_flushes();
1498
1499 kdp_x86_xcpu_poll();
1500 cpu_pause();
1501 }
1502
1503 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1504 DBG("mp_kdp_wait() done\n");
1505 }
1506
1507 void
1508 mp_kdp_exit(void)
1509 {
1510 DBG("mp_kdp_exit()\n");
1511 debugger_cpu = -1;
1512 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1513
1514 debugger_exit_time = mach_absolute_time();
1515
1516 mp_kdp_trap = FALSE;
1517 __asm__ volatile("mfence");
1518
1519 /* Wait other processors to stop spinning. XXX needs timeout */
1520 DBG("mp_kdp_exit() waiting for processors to resume\n");
1521 while (mp_kdp_ncpus > 0) {
1522 /*
1523 * a TLB shootdown request may be pending... this would result in the requesting
1524 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1525 * Process it, so it can now enter mp_kdp_wait()
1526 */
1527 handle_pending_TLB_flushes();
1528
1529 cpu_pause();
1530 }
1531
1532 if (pmsafe_debug && !kdp_snapshot)
1533 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1534
1535 debugger_exit_time = mach_absolute_time();
1536
1537 DBG("mp_kdp_exit() done\n");
1538 (void) ml_set_interrupts_enabled(mp_kdp_state);
1539 postcode(0);
1540 }
1541 #endif /* MACH_KDP */
1542
1543 boolean_t
1544 mp_recent_debugger_activity() {
1545 uint64_t abstime = mach_absolute_time();
1546 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1547 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1548 }
1549
1550 /*ARGSUSED*/
1551 void
1552 init_ast_check(
1553 __unused processor_t processor)
1554 {
1555 }
1556
1557 void
1558 cause_ast_check(
1559 processor_t processor)
1560 {
1561 int cpu = processor->cpu_id;
1562
1563 if (cpu != cpu_number()) {
1564 i386_signal_cpu(cpu, MP_AST, ASYNC);
1565 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1566 }
1567 }
1568
1569 void
1570 slave_machine_init(void *param)
1571 {
1572 /*
1573 * Here in process context, but with interrupts disabled.
1574 */
1575 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1576
1577 if (param == FULL_SLAVE_INIT) {
1578 /*
1579 * Cold start
1580 */
1581 clock_init();
1582 cpu_machine_init(); /* Interrupts enabled hereafter */
1583 mp_cpus_call_cpu_init();
1584 }
1585 }
1586
1587 #undef cpu_number
1588 int cpu_number(void)
1589 {
1590 return get_cpu_number();
1591 }
1592
1593 static void
1594 cpu_prewarm_init()
1595 {
1596 int i;
1597
1598 simple_lock_init(&cpu_warm_lock, 0);
1599 queue_init(&cpu_warm_call_list);
1600 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1601 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1602 }
1603 }
1604
1605 static timer_call_t
1606 grab_warm_timer_call()
1607 {
1608 spl_t x;
1609 timer_call_t call = NULL;
1610
1611 x = splsched();
1612 simple_lock(&cpu_warm_lock);
1613 if (!queue_empty(&cpu_warm_call_list)) {
1614 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1615 }
1616 simple_unlock(&cpu_warm_lock);
1617 splx(x);
1618
1619 return call;
1620 }
1621
1622 static void
1623 free_warm_timer_call(timer_call_t call)
1624 {
1625 spl_t x;
1626
1627 x = splsched();
1628 simple_lock(&cpu_warm_lock);
1629 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1630 simple_unlock(&cpu_warm_lock);
1631 splx(x);
1632 }
1633
1634 /*
1635 * Runs in timer call context (interrupts disabled).
1636 */
1637 static void
1638 cpu_warm_timer_call_func(
1639 call_entry_param_t p0,
1640 __unused call_entry_param_t p1)
1641 {
1642 free_warm_timer_call((timer_call_t)p0);
1643 return;
1644 }
1645
1646 /*
1647 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1648 */
1649 static void
1650 _cpu_warm_setup(
1651 void *arg)
1652 {
1653 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1654
1655 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL);
1656 cwdp->cwd_result = 0;
1657
1658 return;
1659 }
1660
1661 /*
1662 * Not safe to call with interrupts disabled.
1663 */
1664 kern_return_t
1665 ml_interrupt_prewarm(
1666 uint64_t deadline)
1667 {
1668 struct cpu_warm_data cwd;
1669 timer_call_t call;
1670 cpu_t ct;
1671
1672 if (ml_get_interrupts_enabled() == FALSE) {
1673 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1674 }
1675
1676 /*
1677 * If the platform doesn't need our help, say that we succeeded.
1678 */
1679 if (!ml_get_interrupt_prewake_applicable()) {
1680 return KERN_SUCCESS;
1681 }
1682
1683 /*
1684 * Grab a timer call to use.
1685 */
1686 call = grab_warm_timer_call();
1687 if (call == NULL) {
1688 return KERN_RESOURCE_SHORTAGE;
1689 }
1690
1691 timer_call_setup(call, cpu_warm_timer_call_func, call);
1692 cwd.cwd_call = call;
1693 cwd.cwd_deadline = deadline;
1694 cwd.cwd_result = 0;
1695
1696 /*
1697 * For now, non-local interrupts happen on the master processor.
1698 */
1699 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1700 if (ct == 0) {
1701 free_warm_timer_call(call);
1702 return KERN_FAILURE;
1703 } else {
1704 return cwd.cwd_result;
1705 }
1706 }