]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
dc50b4848e2489e699a51d40bb7e3b310a9666d9
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #include <mach_rt.h>
33 #include <mach_kdp.h>
34 #include <mach_ldebug.h>
35 #include <gprof.h>
36
37 #include <mach/mach_types.h>
38 #include <mach/kern_return.h>
39
40 #include <kern/kern_types.h>
41 #include <kern/startup.h>
42 #include <kern/timer_queue.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53
54 #include <vm/vm_map.h>
55 #include <vm/vm_kern.h>
56
57 #include <profiling/profile-mk.h>
58
59 #include <i386/proc_reg.h>
60 #include <i386/cpu_threads.h>
61 #include <i386/mp_desc.h>
62 #include <i386/misc_protos.h>
63 #include <i386/trap.h>
64 #include <i386/postcode.h>
65 #include <i386/machine_routines.h>
66 #include <i386/mp.h>
67 #include <i386/mp_events.h>
68 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
70 #include <i386/fpu.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/pmCPU.h>
73 #if CONFIG_MCA
74 #include <i386/machine_check.h>
75 #endif
76 #include <i386/acpi.h>
77
78 #include <chud/chud_xnu.h>
79 #include <chud/chud_xnu_private.h>
80
81 #include <sys/kdebug.h>
82
83 #include <console/serial_protos.h>
84
85 #if MP_DEBUG
86 #define PAUSE delay(1000000)
87 #define DBG(x...) kprintf(x)
88 #else
89 #define DBG(x...)
90 #define PAUSE
91 #endif /* MP_DEBUG */
92
93 /* Debugging/test trace events: */
94 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
95 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
96 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
97 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
98 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
99 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
100 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
101 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
102
103 #define ABS(v) (((v) > 0)?(v):-(v))
104
105 void slave_boot_init(void);
106 void i386_cpu_IPI(int cpu);
107
108 #if MACH_KDP
109 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
110 #endif /* MACH_KDP */
111 static void mp_rendezvous_action(void);
112 static void mp_broadcast_action(void);
113
114 #if MACH_KDP
115 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
116 #endif /* MACH_KDP */
117 static int NMIInterruptHandler(x86_saved_state_t *regs);
118
119 boolean_t smp_initialized = FALSE;
120 uint32_t TSC_sync_margin = 0xFFF;
121 volatile boolean_t force_immediate_debugger_NMI = FALSE;
122 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
123 decl_simple_lock_data(,mp_kdp_lock);
124
125 decl_lck_mtx_data(static, mp_cpu_boot_lock);
126 lck_mtx_ext_t mp_cpu_boot_lock_ext;
127
128 /* Variables needed for MP rendezvous. */
129 decl_simple_lock_data(,mp_rv_lock);
130 static void (*mp_rv_setup_func)(void *arg);
131 static void (*mp_rv_action_func)(void *arg);
132 static void (*mp_rv_teardown_func)(void *arg);
133 static void *mp_rv_func_arg;
134 static volatile int mp_rv_ncpus;
135 /* Cache-aligned barriers: */
136 static volatile long mp_rv_entry __attribute__((aligned(64)));
137 static volatile long mp_rv_exit __attribute__((aligned(64)));
138 static volatile long mp_rv_complete __attribute__((aligned(64)));
139
140 volatile uint64_t debugger_entry_time;
141 volatile uint64_t debugger_exit_time;
142 #if MACH_KDP
143 #include <kdp/kdp.h>
144 extern int kdp_snapshot;
145 static struct _kdp_xcpu_call_func {
146 kdp_x86_xcpu_func_t func;
147 void *arg0, *arg1;
148 volatile long ret;
149 volatile uint16_t cpu;
150 } kdp_xcpu_call_func = {
151 .cpu = KDP_XCPU_NONE
152 };
153
154 #endif
155
156 /* Variables needed for MP broadcast. */
157 static void (*mp_bc_action_func)(void *arg);
158 static void *mp_bc_func_arg;
159 static int mp_bc_ncpus;
160 static volatile long mp_bc_count;
161 decl_lck_mtx_data(static, mp_bc_lock);
162 lck_mtx_ext_t mp_bc_lock_ext;
163 static volatile int debugger_cpu = -1;
164 volatile long NMIPI_acks = 0;
165 volatile long NMI_count = 0;
166
167 extern void NMI_cpus(void);
168
169 static void mp_cpus_call_init(void);
170 static void mp_cpus_call_cpu_init(void);
171 static void mp_cpus_call_action(void);
172 static void mp_call_PM(void);
173
174 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
175
176 /* PAL-related routines */
177 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
178 int ipi_vector, i386_intr_func_t ipi_handler);
179 void i386_start_cpu(int lapic_id, int cpu_num);
180 void i386_send_NMI(int cpu);
181
182 #if GPROF
183 /*
184 * Initialize dummy structs for profiling. These aren't used but
185 * allows hertz_tick() to be built with GPROF defined.
186 */
187 struct profile_vars _profile_vars;
188 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
189 #define GPROF_INIT() \
190 { \
191 int i; \
192 \
193 /* Hack to initialize pointers to unused profiling structs */ \
194 for (i = 1; i < MAX_CPUS; i++) \
195 _profile_vars_cpus[i] = &_profile_vars; \
196 }
197 #else
198 #define GPROF_INIT()
199 #endif /* GPROF */
200
201 static lck_grp_t smp_lck_grp;
202 static lck_grp_attr_t smp_lck_grp_attr;
203
204 #define NUM_CPU_WARM_CALLS 20
205 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
206 queue_head_t cpu_warm_call_list;
207 decl_simple_lock_data(static, cpu_warm_lock);
208
209 typedef struct cpu_warm_data {
210 timer_call_t cwd_call;
211 uint64_t cwd_deadline;
212 int cwd_result;
213 } *cpu_warm_data_t;
214
215 static void cpu_prewarm_init(void);
216 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
217 static void _cpu_warm_setup(void *arg);
218 static timer_call_t grab_warm_timer_call(void);
219 static void free_warm_timer_call(timer_call_t call);
220
221 void
222 smp_init(void)
223 {
224 simple_lock_init(&mp_kdp_lock, 0);
225 simple_lock_init(&mp_rv_lock, 0);
226 lck_grp_attr_setdefault(&smp_lck_grp_attr);
227 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
228 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
229 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
230 console_init();
231
232 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
233 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
234 return;
235
236 cpu_thread_init();
237
238 GPROF_INIT();
239 DBGLOG_CPU_INIT(master_cpu);
240
241 mp_cpus_call_init();
242 mp_cpus_call_cpu_init();
243
244 if (PE_parse_boot_argn("TSC_sync_margin",
245 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
246 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
247 } else if (cpuid_vmm_present()) {
248 kprintf("TSC sync margin disabled\n");
249 TSC_sync_margin = 0;
250 }
251 smp_initialized = TRUE;
252
253 cpu_prewarm_init();
254
255 return;
256 }
257
258 typedef struct {
259 int target_cpu;
260 int target_lapic;
261 int starter_cpu;
262 } processor_start_info_t;
263 static processor_start_info_t start_info __attribute__((aligned(64)));
264
265 /*
266 * Cache-alignment is to avoid cross-cpu false-sharing interference.
267 */
268 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
269 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
270 static volatile uint64_t tsc_target __attribute__((aligned(64)));
271
272 /*
273 * Poll a CPU to see when it has marked itself as running.
274 */
275 static void
276 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
277 {
278 while (iters-- > 0) {
279 if (cpu_datap(slot_num)->cpu_running)
280 break;
281 delay(usecdelay);
282 }
283 }
284
285 /*
286 * Quickly bring a CPU back online which has been halted.
287 */
288 kern_return_t
289 intel_startCPU_fast(int slot_num)
290 {
291 kern_return_t rc;
292
293 /*
294 * Try to perform a fast restart
295 */
296 rc = pmCPUExitHalt(slot_num);
297 if (rc != KERN_SUCCESS)
298 /*
299 * The CPU was not eligible for a fast restart.
300 */
301 return(rc);
302
303 KERNEL_DEBUG_CONSTANT(
304 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
305 slot_num, 0, 0, 0, 0);
306
307 /*
308 * Wait until the CPU is back online.
309 */
310 mp_disable_preemption();
311
312 /*
313 * We use short pauses (1us) for low latency. 30,000 iterations is
314 * longer than a full restart would require so it should be more
315 * than long enough.
316 */
317
318 mp_wait_for_cpu_up(slot_num, 30000, 1);
319 mp_enable_preemption();
320
321 KERNEL_DEBUG_CONSTANT(
322 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
323 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
324
325 /*
326 * Check to make sure that the CPU is really running. If not,
327 * go through the slow path.
328 */
329 if (cpu_datap(slot_num)->cpu_running)
330 return(KERN_SUCCESS);
331 else
332 return(KERN_FAILURE);
333 }
334
335 static void
336 started_cpu(void)
337 {
338 /* Here on the started cpu with cpu_running set TRUE */
339
340 if (TSC_sync_margin &&
341 start_info.target_cpu == cpu_number()) {
342 /*
343 * I've just started-up, synchronize again with the starter cpu
344 * and then snap my TSC.
345 */
346 tsc_target = 0;
347 atomic_decl(&tsc_entry_barrier, 1);
348 while (tsc_entry_barrier != 0)
349 ; /* spin for starter and target at barrier */
350 tsc_target = rdtsc64();
351 atomic_decl(&tsc_exit_barrier, 1);
352 }
353 }
354
355 static void
356 start_cpu(void *arg)
357 {
358 int i = 1000;
359 processor_start_info_t *psip = (processor_start_info_t *) arg;
360
361 /* Ignore this if the current processor is not the starter */
362 if (cpu_number() != psip->starter_cpu)
363 return;
364
365 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
366 arg, psip->target_cpu, psip->target_lapic);
367
368 KERNEL_DEBUG_CONSTANT(
369 TRACE_MP_CPU_START | DBG_FUNC_START,
370 psip->target_cpu,
371 psip->target_lapic, 0, 0, 0);
372
373 i386_start_cpu(psip->target_lapic, psip->target_cpu);
374
375 #ifdef POSTCODE_DELAY
376 /* Wait much longer if postcodes are displayed for a delay period. */
377 i *= 10000;
378 #endif
379 DBG("start_cpu(%p) about to wait for cpu %d\n",
380 arg, psip->target_cpu);
381
382 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
383
384 KERNEL_DEBUG_CONSTANT(
385 TRACE_MP_CPU_START | DBG_FUNC_END,
386 psip->target_cpu,
387 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
388
389 if (TSC_sync_margin &&
390 cpu_datap(psip->target_cpu)->cpu_running) {
391 /*
392 * Compare the TSC from the started processor with ours.
393 * Report and log/panic if it diverges by more than
394 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
395 * can be overriden by boot-arg (with 0 meaning no checking).
396 */
397 uint64_t tsc_starter;
398 int64_t tsc_delta;
399 atomic_decl(&tsc_entry_barrier, 1);
400 while (tsc_entry_barrier != 0)
401 ; /* spin for both processors at barrier */
402 tsc_starter = rdtsc64();
403 atomic_decl(&tsc_exit_barrier, 1);
404 while (tsc_exit_barrier != 0)
405 ; /* spin for target to store its TSC */
406 tsc_delta = tsc_target - tsc_starter;
407 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
408 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
409 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
410 #if DEBUG
411 panic(
412 #else
413 printf(
414 #endif
415 "Unsynchronized TSC for cpu %d: "
416 "0x%016llx, delta 0x%llx\n",
417 psip->target_cpu, tsc_target, tsc_delta);
418 }
419 }
420 }
421
422 kern_return_t
423 intel_startCPU(
424 int slot_num)
425 {
426 int lapic = cpu_to_lapic[slot_num];
427 boolean_t istate;
428
429 assert(lapic != -1);
430
431 DBGLOG_CPU_INIT(slot_num);
432
433 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
434 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
435
436 /*
437 * Initialize (or re-initialize) the descriptor tables for this cpu.
438 * Propagate processor mode to slave.
439 */
440 cpu_desc_init64(cpu_datap(slot_num));
441
442 /* Serialize use of the slave boot stack, etc. */
443 lck_mtx_lock(&mp_cpu_boot_lock);
444
445 istate = ml_set_interrupts_enabled(FALSE);
446 if (slot_num == get_cpu_number()) {
447 ml_set_interrupts_enabled(istate);
448 lck_mtx_unlock(&mp_cpu_boot_lock);
449 return KERN_SUCCESS;
450 }
451
452 start_info.starter_cpu = cpu_number();
453 start_info.target_cpu = slot_num;
454 start_info.target_lapic = lapic;
455 tsc_entry_barrier = 2;
456 tsc_exit_barrier = 2;
457
458 /*
459 * Perform the processor startup sequence with all running
460 * processors rendezvous'ed. This is required during periods when
461 * the cache-disable bit is set for MTRR/PAT initialization.
462 */
463 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
464
465 start_info.target_cpu = 0;
466
467 ml_set_interrupts_enabled(istate);
468 lck_mtx_unlock(&mp_cpu_boot_lock);
469
470 if (!cpu_datap(slot_num)->cpu_running) {
471 kprintf("Failed to start CPU %02d\n", slot_num);
472 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
473 delay(1000000);
474 halt_cpu();
475 return KERN_SUCCESS;
476 } else {
477 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
478 return KERN_SUCCESS;
479 }
480 }
481
482 #if MP_DEBUG
483 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
484 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
485
486 MP_EVENT_NAME_DECL();
487
488 #endif /* MP_DEBUG */
489
490 int
491 cpu_signal_handler(x86_saved_state_t *regs)
492 {
493 #if !MACH_KDP
494 #pragma unused (regs)
495 #endif /* !MACH_KDP */
496 int my_cpu;
497 volatile int *my_word;
498
499 SCHED_STATS_IPI(current_processor());
500
501 my_cpu = cpu_number();
502 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
503 /* Store the initial set of signals for diagnostics. New
504 * signals could arrive while these are being processed
505 * so it's no more than a hint.
506 */
507
508 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
509
510 do {
511 #if MACH_KDP
512 if (i_bit(MP_KDP, my_word) && regs != NULL) {
513 DBGLOG(cpu_handle,my_cpu,MP_KDP);
514 i_bit_clear(MP_KDP, my_word);
515 /* Ensure that the i386_kernel_state at the base of the
516 * current thread's stack (if any) is synchronized with the
517 * context at the moment of the interrupt, to facilitate
518 * access through the debugger.
519 */
520 sync_iss_to_iks(regs);
521 if (pmsafe_debug && !kdp_snapshot)
522 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
523 mp_kdp_wait(TRUE, FALSE);
524 if (pmsafe_debug && !kdp_snapshot)
525 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
526 } else
527 #endif /* MACH_KDP */
528 if (i_bit(MP_TLB_FLUSH, my_word)) {
529 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
530 i_bit_clear(MP_TLB_FLUSH, my_word);
531 pmap_update_interrupt();
532 } else if (i_bit(MP_AST, my_word)) {
533 DBGLOG(cpu_handle,my_cpu,MP_AST);
534 i_bit_clear(MP_AST, my_word);
535 ast_check(cpu_to_processor(my_cpu));
536 } else if (i_bit(MP_RENDEZVOUS, my_word)) {
537 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
538 i_bit_clear(MP_RENDEZVOUS, my_word);
539 mp_rendezvous_action();
540 } else if (i_bit(MP_BROADCAST, my_word)) {
541 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
542 i_bit_clear(MP_BROADCAST, my_word);
543 mp_broadcast_action();
544 } else if (i_bit(MP_CHUD, my_word)) {
545 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
546 i_bit_clear(MP_CHUD, my_word);
547 chudxnu_cpu_signal_handler();
548 } else if (i_bit(MP_CALL, my_word)) {
549 DBGLOG(cpu_handle,my_cpu,MP_CALL);
550 i_bit_clear(MP_CALL, my_word);
551 mp_cpus_call_action();
552 } else if (i_bit(MP_CALL_PM, my_word)) {
553 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
554 i_bit_clear(MP_CALL_PM, my_word);
555 mp_call_PM();
556 }
557 } while (*my_word);
558
559 return 0;
560 }
561
562 static int
563 NMIInterruptHandler(x86_saved_state_t *regs)
564 {
565 void *stackptr;
566
567 if (panic_active() && !panicDebugging) {
568 if (pmsafe_debug)
569 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
570 for(;;)
571 cpu_pause();
572 }
573
574 atomic_incl(&NMIPI_acks, 1);
575 atomic_incl(&NMI_count, 1);
576 sync_iss_to_iks_unconditionally(regs);
577 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
578
579 if (cpu_number() == debugger_cpu)
580 goto NMExit;
581
582 if (spinlock_timed_out) {
583 char pstr[192];
584 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
585 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
586 } else if (pmap_tlb_flush_timeout == TRUE) {
587 char pstr[128];
588 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
589 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
590 }
591
592 #if MACH_KDP
593 if (pmsafe_debug && !kdp_snapshot)
594 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
595 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
596 mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
597 if (pmsafe_debug && !kdp_snapshot)
598 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
599 #endif
600 NMExit:
601 return 1;
602 }
603
604
605 /*
606 * cpu_interrupt is really just to be used by the scheduler to
607 * get a CPU's attention it may not always issue an IPI. If an
608 * IPI is always needed then use i386_cpu_IPI.
609 */
610 void
611 cpu_interrupt(int cpu)
612 {
613 boolean_t did_IPI = FALSE;
614
615 if (smp_initialized
616 && pmCPUExitIdle(cpu_datap(cpu))) {
617 i386_cpu_IPI(cpu);
618 did_IPI = TRUE;
619 }
620
621 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
622 }
623
624 /*
625 * Send a true NMI via the local APIC to the specified CPU.
626 */
627 void
628 cpu_NMI_interrupt(int cpu)
629 {
630 if (smp_initialized) {
631 i386_send_NMI(cpu);
632 }
633 }
634
635 void
636 NMI_cpus(void)
637 {
638 unsigned int cpu;
639 boolean_t intrs_enabled;
640 uint64_t tsc_timeout;
641
642 intrs_enabled = ml_set_interrupts_enabled(FALSE);
643
644 for (cpu = 0; cpu < real_ncpus; cpu++) {
645 if (!cpu_datap(cpu)->cpu_running)
646 continue;
647 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
648 cpu_NMI_interrupt(cpu);
649 tsc_timeout = !machine_timeout_suspended() ?
650 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
651 ~0ULL;
652 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
653 handle_pending_TLB_flushes();
654 cpu_pause();
655 if (rdtsc64() > tsc_timeout)
656 panic("NMI_cpus() timeout cpu %d", cpu);
657 }
658 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
659 }
660
661 ml_set_interrupts_enabled(intrs_enabled);
662 }
663
664 static void (* volatile mp_PM_func)(void) = NULL;
665
666 static void
667 mp_call_PM(void)
668 {
669 assert(!ml_get_interrupts_enabled());
670
671 if (mp_PM_func != NULL)
672 mp_PM_func();
673 }
674
675 void
676 cpu_PM_interrupt(int cpu)
677 {
678 assert(!ml_get_interrupts_enabled());
679
680 if (mp_PM_func != NULL) {
681 if (cpu == cpu_number())
682 mp_PM_func();
683 else
684 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
685 }
686 }
687
688 void
689 PM_interrupt_register(void (*fn)(void))
690 {
691 mp_PM_func = fn;
692 }
693
694 void
695 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
696 {
697 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
698 uint64_t tsc_timeout;
699
700
701 if (!cpu_datap(cpu)->cpu_running)
702 return;
703
704 if (event == MP_TLB_FLUSH)
705 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
706
707 DBGLOG(cpu_signal, cpu, event);
708
709 i_bit_set(event, signals);
710 i386_cpu_IPI(cpu);
711 if (mode == SYNC) {
712 again:
713 tsc_timeout = !machine_timeout_suspended() ?
714 rdtsc64() + (1000*1000*1000) :
715 ~0ULL;
716 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
717 cpu_pause();
718 }
719 if (i_bit(event, signals)) {
720 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
721 cpu, event);
722 goto again;
723 }
724 }
725 if (event == MP_TLB_FLUSH)
726 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
727 }
728
729 /*
730 * Send event to all running cpus.
731 * Called with the topology locked.
732 */
733 void
734 i386_signal_cpus(mp_event_t event, mp_sync_t mode)
735 {
736 unsigned int cpu;
737 unsigned int my_cpu = cpu_number();
738
739 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
740
741 for (cpu = 0; cpu < real_ncpus; cpu++) {
742 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
743 continue;
744 i386_signal_cpu(cpu, event, mode);
745 }
746 }
747
748 /*
749 * Return the number of running cpus.
750 * Called with the topology locked.
751 */
752 int
753 i386_active_cpus(void)
754 {
755 unsigned int cpu;
756 unsigned int ncpus = 0;
757
758 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
759
760 for (cpu = 0; cpu < real_ncpus; cpu++) {
761 if (cpu_datap(cpu)->cpu_running)
762 ncpus++;
763 }
764 return(ncpus);
765 }
766
767 /*
768 * Helper function called when busy-waiting: panic if too long
769 * a TSC-based time has elapsed since the start of the spin.
770 */
771 static void
772 mp_spin_timeout_check(uint64_t tsc_start, const char *msg)
773 {
774 uint64_t tsc_timeout;
775
776 cpu_pause();
777 if (machine_timeout_suspended())
778 return;
779
780 /*
781 * The timeout is 4 * the spinlock timeout period
782 * unless we have serial console printing (kprintf) enabled
783 * in which case we allow an even greater margin.
784 */
785 tsc_timeout = disable_serial_output ? (uint64_t) LockTimeOutTSC << 2
786 : (uint64_t) LockTimeOutTSC << 4;
787 if (rdtsc64() > tsc_start + tsc_timeout)
788 panic("%s: spin timeout", msg);
789 }
790
791 /*
792 * All-CPU rendezvous:
793 * - CPUs are signalled,
794 * - all execute the setup function (if specified),
795 * - rendezvous (i.e. all cpus reach a barrier),
796 * - all execute the action function (if specified),
797 * - rendezvous again,
798 * - execute the teardown function (if specified), and then
799 * - resume.
800 *
801 * Note that the supplied external functions _must_ be reentrant and aware
802 * that they are running in parallel and in an unknown lock context.
803 */
804
805 static void
806 mp_rendezvous_action(void)
807 {
808 boolean_t intrs_enabled;
809 uint64_t tsc_spin_start;
810
811 /* setup function */
812 if (mp_rv_setup_func != NULL)
813 mp_rv_setup_func(mp_rv_func_arg);
814
815 intrs_enabled = ml_get_interrupts_enabled();
816
817 /* spin on entry rendezvous */
818 atomic_incl(&mp_rv_entry, 1);
819 tsc_spin_start = rdtsc64();
820 while (mp_rv_entry < mp_rv_ncpus) {
821 /* poll for pesky tlb flushes if interrupts disabled */
822 if (!intrs_enabled)
823 handle_pending_TLB_flushes();
824 mp_spin_timeout_check(tsc_spin_start,
825 "mp_rendezvous_action() entry");
826 }
827
828 /* action function */
829 if (mp_rv_action_func != NULL)
830 mp_rv_action_func(mp_rv_func_arg);
831
832 /* spin on exit rendezvous */
833 atomic_incl(&mp_rv_exit, 1);
834 tsc_spin_start = rdtsc64();
835 while (mp_rv_exit < mp_rv_ncpus) {
836 if (!intrs_enabled)
837 handle_pending_TLB_flushes();
838 mp_spin_timeout_check(tsc_spin_start,
839 "mp_rendezvous_action() exit");
840 }
841
842 /* teardown function */
843 if (mp_rv_teardown_func != NULL)
844 mp_rv_teardown_func(mp_rv_func_arg);
845
846 /* Bump completion count */
847 atomic_incl(&mp_rv_complete, 1);
848 }
849
850 void
851 mp_rendezvous(void (*setup_func)(void *),
852 void (*action_func)(void *),
853 void (*teardown_func)(void *),
854 void *arg)
855 {
856 uint64_t tsc_spin_start;
857
858 if (!smp_initialized) {
859 if (setup_func != NULL)
860 setup_func(arg);
861 if (action_func != NULL)
862 action_func(arg);
863 if (teardown_func != NULL)
864 teardown_func(arg);
865 return;
866 }
867
868 /* obtain rendezvous lock */
869 simple_lock(&mp_rv_lock);
870
871 /* set static function pointers */
872 mp_rv_setup_func = setup_func;
873 mp_rv_action_func = action_func;
874 mp_rv_teardown_func = teardown_func;
875 mp_rv_func_arg = arg;
876
877 mp_rv_entry = 0;
878 mp_rv_exit = 0;
879 mp_rv_complete = 0;
880
881 /*
882 * signal other processors, which will call mp_rendezvous_action()
883 * with interrupts disabled
884 */
885 simple_lock(&x86_topo_lock);
886 mp_rv_ncpus = i386_active_cpus();
887 i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
888 simple_unlock(&x86_topo_lock);
889
890 /* call executor function on this cpu */
891 mp_rendezvous_action();
892
893 /*
894 * Spin for everyone to complete.
895 * This is necessary to ensure that all processors have proceeded
896 * from the exit barrier before we release the rendezvous structure.
897 */
898 tsc_spin_start = rdtsc64();
899 while (mp_rv_complete < mp_rv_ncpus) {
900 mp_spin_timeout_check(tsc_spin_start, "mp_rendezvous()");
901 }
902
903 /* Tidy up */
904 mp_rv_setup_func = NULL;
905 mp_rv_action_func = NULL;
906 mp_rv_teardown_func = NULL;
907 mp_rv_func_arg = NULL;
908
909 /* release lock */
910 simple_unlock(&mp_rv_lock);
911 }
912
913 void
914 mp_rendezvous_break_lock(void)
915 {
916 simple_lock_init(&mp_rv_lock, 0);
917 }
918
919 static void
920 setup_disable_intrs(__unused void * param_not_used)
921 {
922 /* disable interrupts before the first barrier */
923 boolean_t intr = ml_set_interrupts_enabled(FALSE);
924
925 current_cpu_datap()->cpu_iflag = intr;
926 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
927 }
928
929 static void
930 teardown_restore_intrs(__unused void * param_not_used)
931 {
932 /* restore interrupt flag following MTRR changes */
933 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
934 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
935 }
936
937 /*
938 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
939 * This is exported for use by kexts.
940 */
941 void
942 mp_rendezvous_no_intrs(
943 void (*action_func)(void *),
944 void *arg)
945 {
946 mp_rendezvous(setup_disable_intrs,
947 action_func,
948 teardown_restore_intrs,
949 arg);
950 }
951
952
953 typedef struct {
954 queue_chain_t link; /* queue linkage */
955 void (*func)(void *,void *); /* routine to call */
956 void *arg0; /* routine's 1st arg */
957 void *arg1; /* routine's 2nd arg */
958 volatile long *countp; /* completion counter */
959 } mp_call_t;
960
961
962 typedef struct {
963 queue_head_t queue;
964 decl_simple_lock_data(, lock);
965 } mp_call_queue_t;
966 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
967 static mp_call_queue_t mp_cpus_call_freelist;
968 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
969
970 static inline boolean_t
971 mp_call_head_lock(mp_call_queue_t *cqp)
972 {
973 boolean_t intrs_enabled;
974
975 intrs_enabled = ml_set_interrupts_enabled(FALSE);
976 simple_lock(&cqp->lock);
977
978 return intrs_enabled;
979 }
980
981 static inline boolean_t
982 mp_call_head_is_locked(mp_call_queue_t *cqp)
983 {
984 return !ml_get_interrupts_enabled() &&
985 hw_lock_held((hw_lock_t)&cqp->lock);
986 }
987
988 static inline void
989 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
990 {
991 simple_unlock(&cqp->lock);
992 ml_set_interrupts_enabled(intrs_enabled);
993 }
994
995 static inline mp_call_t *
996 mp_call_alloc(void)
997 {
998 mp_call_t *callp = NULL;
999 boolean_t intrs_enabled;
1000 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1001
1002 intrs_enabled = mp_call_head_lock(cqp);
1003 if (!queue_empty(&cqp->queue))
1004 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1005 mp_call_head_unlock(cqp, intrs_enabled);
1006
1007 return callp;
1008 }
1009
1010 static inline void
1011 mp_call_free(mp_call_t *callp)
1012 {
1013 boolean_t intrs_enabled;
1014 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1015
1016 intrs_enabled = mp_call_head_lock(cqp);
1017 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1018 mp_call_head_unlock(cqp, intrs_enabled);
1019 }
1020
1021 static inline mp_call_t *
1022 mp_call_dequeue_locked(mp_call_queue_t *cqp)
1023 {
1024 mp_call_t *callp = NULL;
1025
1026 assert(mp_call_head_is_locked(cqp));
1027 if (!queue_empty(&cqp->queue))
1028 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1029 return callp;
1030 }
1031
1032 static inline void
1033 mp_call_enqueue_locked(
1034 mp_call_queue_t *cqp,
1035 mp_call_t *callp)
1036 {
1037 queue_enter(&cqp->queue, callp, typeof(callp), link);
1038 }
1039
1040 /* Called on the boot processor to initialize global structures */
1041 static void
1042 mp_cpus_call_init(void)
1043 {
1044 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1045
1046 DBG("mp_cpus_call_init()\n");
1047 simple_lock_init(&cqp->lock, 0);
1048 queue_init(&cqp->queue);
1049 }
1050
1051 /*
1052 * Called by each processor to add call buffers to the free list
1053 * and to initialize the per-cpu call queue.
1054 * Also called but ignored on slave processors on re-start/wake.
1055 */
1056 static void
1057 mp_cpus_call_cpu_init(void)
1058 {
1059 int i;
1060 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()];
1061 mp_call_t *callp;
1062
1063 if (cqp->queue.next != NULL)
1064 return; /* restart/wake case: called already */
1065
1066 simple_lock_init(&cqp->lock, 0);
1067 queue_init(&cqp->queue);
1068 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1069 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
1070 mp_call_free(callp);
1071 }
1072
1073 DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
1074 }
1075
1076 /*
1077 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1078 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1079 */
1080 static void
1081 mp_cpus_call_action(void)
1082 {
1083 mp_call_queue_t *cqp;
1084 boolean_t intrs_enabled;
1085 mp_call_t *callp;
1086 mp_call_t call;
1087
1088 assert(!ml_get_interrupts_enabled());
1089 cqp = &mp_cpus_call_head[cpu_number()];
1090 intrs_enabled = mp_call_head_lock(cqp);
1091 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1092 /* Copy call request to the stack to free buffer */
1093 call = *callp;
1094 mp_call_free(callp);
1095 if (call.func != NULL) {
1096 mp_call_head_unlock(cqp, intrs_enabled);
1097 KERNEL_DEBUG_CONSTANT(
1098 TRACE_MP_CPUS_CALL_ACTION,
1099 call.func, call.arg0, call.arg1, call.countp, 0);
1100 call.func(call.arg0, call.arg1);
1101 (void) mp_call_head_lock(cqp);
1102 }
1103 if (call.countp != NULL)
1104 atomic_incl(call.countp, 1);
1105 }
1106 mp_call_head_unlock(cqp, intrs_enabled);
1107 }
1108
1109 /*
1110 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1111 * Possible modes are:
1112 * SYNC: function is called serially on target cpus in logical cpu order
1113 * waiting for each call to be acknowledged before proceeding
1114 * ASYNC: function call is queued to the specified cpus
1115 * waiting for all calls to complete in parallel before returning
1116 * NOSYNC: function calls are queued
1117 * but we return before confirmation of calls completing.
1118 * The action function may be NULL.
1119 * The cpu mask may include the local cpu. Offline cpus are ignored.
1120 * The return value is the number of cpus on which the call was made or queued.
1121 */
1122 cpu_t
1123 mp_cpus_call(
1124 cpumask_t cpus,
1125 mp_sync_t mode,
1126 void (*action_func)(void *),
1127 void *arg)
1128 {
1129 return mp_cpus_call1(
1130 cpus,
1131 mode,
1132 (void (*)(void *,void *))action_func,
1133 arg,
1134 NULL,
1135 NULL,
1136 NULL);
1137 }
1138
1139 static void
1140 mp_cpus_call_wait(boolean_t intrs_enabled,
1141 long mp_cpus_signals,
1142 volatile long *mp_cpus_calls)
1143 {
1144 mp_call_queue_t *cqp;
1145 uint64_t tsc_spin_start;
1146
1147 cqp = &mp_cpus_call_head[cpu_number()];
1148
1149 tsc_spin_start = rdtsc64();
1150 while (*mp_cpus_calls < mp_cpus_signals) {
1151 if (!intrs_enabled) {
1152 /* Sniffing w/o locking */
1153 if (!queue_empty(&cqp->queue))
1154 mp_cpus_call_action();
1155 handle_pending_TLB_flushes();
1156 }
1157 mp_spin_timeout_check(tsc_spin_start, "mp_cpus_call_wait()");
1158 }
1159 }
1160
1161 cpu_t
1162 mp_cpus_call1(
1163 cpumask_t cpus,
1164 mp_sync_t mode,
1165 void (*action_func)(void *, void *),
1166 void *arg0,
1167 void *arg1,
1168 cpumask_t *cpus_calledp,
1169 cpumask_t *cpus_notcalledp)
1170 {
1171 cpu_t cpu;
1172 boolean_t intrs_enabled = FALSE;
1173 boolean_t call_self = FALSE;
1174 cpumask_t cpus_called = 0;
1175 cpumask_t cpus_notcalled = 0;
1176 long mp_cpus_signals = 0;
1177 volatile long mp_cpus_calls = 0;
1178 uint64_t tsc_spin_start;
1179
1180 KERNEL_DEBUG_CONSTANT(
1181 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1182 cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
1183
1184 if (!smp_initialized) {
1185 if ((cpus & CPUMASK_SELF) == 0)
1186 goto out;
1187 if (action_func != NULL) {
1188 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1189 action_func(arg0, arg1);
1190 ml_set_interrupts_enabled(intrs_enabled);
1191 }
1192 call_self = TRUE;
1193 goto out;
1194 }
1195
1196 /*
1197 * Queue the call for each non-local requested cpu.
1198 * The topo lock is not taken. Instead we sniff the cpu_running state
1199 * and then re-check it after taking the call lock. A cpu being taken
1200 * offline runs the action function after clearing the cpu_running.
1201 */
1202 mp_disable_preemption(); /* interrupts may be enabled */
1203 tsc_spin_start = rdtsc64();
1204 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1205 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1206 !cpu_datap(cpu)->cpu_running)
1207 continue;
1208 if (cpu == (cpu_t) cpu_number()) {
1209 /*
1210 * We don't IPI ourself and if calling asynchronously,
1211 * we defer our call until we have signalled all others.
1212 */
1213 call_self = TRUE;
1214 cpus_called |= cpu_to_cpumask(cpu);
1215 if (mode == SYNC && action_func != NULL) {
1216 KERNEL_DEBUG_CONSTANT(
1217 TRACE_MP_CPUS_CALL_LOCAL,
1218 VM_KERNEL_UNSLIDE(action_func),
1219 arg0, arg1, 0, 0);
1220 action_func(arg0, arg1);
1221 }
1222 } else {
1223 /*
1224 * Here to queue a call to cpu and IPI.
1225 * Spinning for request buffer unless NOSYNC.
1226 */
1227 mp_call_t *callp = NULL;
1228 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1229
1230 queue_call:
1231 if (callp == NULL)
1232 callp = mp_call_alloc();
1233 intrs_enabled = mp_call_head_lock(cqp);
1234 if (!cpu_datap(cpu)->cpu_running) {
1235 mp_call_head_unlock(cqp, intrs_enabled);
1236 continue;
1237 }
1238 if (mode == NOSYNC) {
1239 if (callp == NULL) {
1240 cpus_notcalled |= cpu_to_cpumask(cpu);
1241 mp_call_head_unlock(cqp, intrs_enabled);
1242 KERNEL_DEBUG_CONSTANT(
1243 TRACE_MP_CPUS_CALL_NOBUF,
1244 cpu, 0, 0, 0, 0);
1245 continue;
1246 }
1247 callp->countp = NULL;
1248 } else {
1249 if (callp == NULL) {
1250 mp_call_head_unlock(cqp, intrs_enabled);
1251 KERNEL_DEBUG_CONSTANT(
1252 TRACE_MP_CPUS_CALL_NOBUF,
1253 cpu, 0, 0, 0, 0);
1254 if (!intrs_enabled) {
1255 /* Sniffing w/o locking */
1256 if (!queue_empty(&cqp->queue))
1257 mp_cpus_call_action();
1258 handle_pending_TLB_flushes();
1259 }
1260 mp_spin_timeout_check(
1261 tsc_spin_start,
1262 "mp_cpus_call1()");
1263 goto queue_call;
1264 }
1265 callp->countp = &mp_cpus_calls;
1266 }
1267 callp->func = action_func;
1268 callp->arg0 = arg0;
1269 callp->arg1 = arg1;
1270 mp_call_enqueue_locked(cqp, callp);
1271 mp_cpus_signals++;
1272 cpus_called |= cpu_to_cpumask(cpu);
1273 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1274 mp_call_head_unlock(cqp, intrs_enabled);
1275 if (mode == SYNC) {
1276 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1277 }
1278 }
1279 }
1280
1281 /* Call locally if mode not SYNC */
1282 if (mode != SYNC && call_self ) {
1283 KERNEL_DEBUG_CONSTANT(
1284 TRACE_MP_CPUS_CALL_LOCAL,
1285 VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
1286 if (action_func != NULL) {
1287 ml_set_interrupts_enabled(FALSE);
1288 action_func(arg0, arg1);
1289 ml_set_interrupts_enabled(intrs_enabled);
1290 }
1291 }
1292
1293 /* Safe to allow pre-emption now */
1294 mp_enable_preemption();
1295
1296 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1297 if (mode == ASYNC) {
1298 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1299 }
1300
1301 out:
1302 cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
1303
1304 if (cpus_calledp)
1305 *cpus_calledp = cpus_called;
1306 if (cpus_notcalledp)
1307 *cpus_notcalledp = cpus_notcalled;
1308
1309 KERNEL_DEBUG_CONSTANT(
1310 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1311 cpu, cpus_called, cpus_notcalled, 0, 0);
1312
1313 return cpu;
1314 }
1315
1316
1317 static void
1318 mp_broadcast_action(void)
1319 {
1320 /* call action function */
1321 if (mp_bc_action_func != NULL)
1322 mp_bc_action_func(mp_bc_func_arg);
1323
1324 /* if we're the last one through, wake up the instigator */
1325 if (atomic_decl_and_test(&mp_bc_count, 1))
1326 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1327 }
1328
1329 /*
1330 * mp_broadcast() runs a given function on all active cpus.
1331 * The caller blocks until the functions has run on all cpus.
1332 * The caller will also block if there is another pending braodcast.
1333 */
1334 void
1335 mp_broadcast(
1336 void (*action_func)(void *),
1337 void *arg)
1338 {
1339 if (!smp_initialized) {
1340 if (action_func != NULL)
1341 action_func(arg);
1342 return;
1343 }
1344
1345 /* obtain broadcast lock */
1346 lck_mtx_lock(&mp_bc_lock);
1347
1348 /* set static function pointers */
1349 mp_bc_action_func = action_func;
1350 mp_bc_func_arg = arg;
1351
1352 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1353
1354 /*
1355 * signal other processors, which will call mp_broadcast_action()
1356 */
1357 simple_lock(&x86_topo_lock);
1358 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */
1359 mp_bc_count = mp_bc_ncpus;
1360 i386_signal_cpus(MP_BROADCAST, ASYNC);
1361
1362 /* call executor function on this cpu */
1363 mp_broadcast_action();
1364 simple_unlock(&x86_topo_lock);
1365
1366 /* block for all cpus to have run action_func */
1367 if (mp_bc_ncpus > 1)
1368 thread_block(THREAD_CONTINUE_NULL);
1369 else
1370 clear_wait(current_thread(), THREAD_AWAKENED);
1371
1372 /* release lock */
1373 lck_mtx_unlock(&mp_bc_lock);
1374 }
1375
1376 void
1377 i386_activate_cpu(void)
1378 {
1379 cpu_data_t *cdp = current_cpu_datap();
1380
1381 assert(!ml_get_interrupts_enabled());
1382
1383 if (!smp_initialized) {
1384 cdp->cpu_running = TRUE;
1385 return;
1386 }
1387
1388 simple_lock(&x86_topo_lock);
1389 cdp->cpu_running = TRUE;
1390 started_cpu();
1391 simple_unlock(&x86_topo_lock);
1392 flush_tlb_raw();
1393 }
1394
1395 void
1396 i386_deactivate_cpu(void)
1397 {
1398 cpu_data_t *cdp = current_cpu_datap();
1399
1400 assert(!ml_get_interrupts_enabled());
1401
1402 KERNEL_DEBUG_CONSTANT(
1403 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1404 0, 0, 0, 0, 0);
1405
1406 simple_lock(&x86_topo_lock);
1407 cdp->cpu_running = FALSE;
1408 simple_unlock(&x86_topo_lock);
1409
1410 /*
1411 * Move all of this cpu's timers to the master/boot cpu,
1412 * and poke it in case there's a sooner deadline for it to schedule.
1413 */
1414 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1415 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
1416
1417 /*
1418 * Open an interrupt window
1419 * and ensure any pending IPI or timer is serviced
1420 */
1421 mp_disable_preemption();
1422 ml_set_interrupts_enabled(TRUE);
1423
1424 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1425 cpu_pause();
1426 /*
1427 * Ensure there's no remaining timer deadline set
1428 * - AICPM may have left one active.
1429 */
1430 setPop(0);
1431
1432 ml_set_interrupts_enabled(FALSE);
1433 mp_enable_preemption();
1434
1435 KERNEL_DEBUG_CONSTANT(
1436 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1437 0, 0, 0, 0, 0);
1438 }
1439
1440 int pmsafe_debug = 1;
1441
1442 #if MACH_KDP
1443 volatile boolean_t mp_kdp_trap = FALSE;
1444 volatile unsigned long mp_kdp_ncpus;
1445 boolean_t mp_kdp_state;
1446
1447
1448 void
1449 mp_kdp_enter(void)
1450 {
1451 unsigned int cpu;
1452 unsigned int ncpus = 0;
1453 unsigned int my_cpu;
1454 uint64_t tsc_timeout;
1455
1456 DBG("mp_kdp_enter()\n");
1457
1458 #if DEBUG
1459 if (!smp_initialized)
1460 simple_lock_init(&mp_kdp_lock, 0);
1461 #endif
1462
1463 /*
1464 * Here to enter the debugger.
1465 * In case of races, only one cpu is allowed to enter kdp after
1466 * stopping others.
1467 */
1468 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1469 my_cpu = cpu_number();
1470
1471 if (my_cpu == (unsigned) debugger_cpu) {
1472 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1473 kdp_reset();
1474 return;
1475 }
1476
1477 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1478 simple_lock(&mp_kdp_lock);
1479
1480 if (pmsafe_debug && !kdp_snapshot)
1481 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1482
1483 while (mp_kdp_trap) {
1484 simple_unlock(&mp_kdp_lock);
1485 DBG("mp_kdp_enter() race lost\n");
1486 #if MACH_KDP
1487 mp_kdp_wait(TRUE, FALSE);
1488 #endif
1489 simple_lock(&mp_kdp_lock);
1490 }
1491 debugger_cpu = my_cpu;
1492 ncpus = 1;
1493 mp_kdp_ncpus = 1; /* self */
1494 mp_kdp_trap = TRUE;
1495 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1496 simple_unlock(&mp_kdp_lock);
1497
1498 /*
1499 * Deliver a nudge to other cpus, counting how many
1500 */
1501 DBG("mp_kdp_enter() signaling other processors\n");
1502 if (force_immediate_debugger_NMI == FALSE) {
1503 for (cpu = 0; cpu < real_ncpus; cpu++) {
1504 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1505 continue;
1506 ncpus++;
1507 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1508 }
1509 /*
1510 * Wait other processors to synchronize
1511 */
1512 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1513
1514 /*
1515 * This timeout is rather arbitrary; we don't want to NMI
1516 * processors that are executing at potentially
1517 * "unsafe-to-interrupt" points such as the trampolines,
1518 * but neither do we want to lose state by waiting too long.
1519 */
1520 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL);
1521
1522 if (virtualized)
1523 tsc_timeout = ~0ULL;
1524
1525 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1526 /*
1527 * A TLB shootdown request may be pending--this would
1528 * result in the requesting processor waiting in
1529 * PMAP_UPDATE_TLBS() until this processor deals with it.
1530 * Process it, so it can now enter mp_kdp_wait()
1531 */
1532 handle_pending_TLB_flushes();
1533 cpu_pause();
1534 }
1535 /* If we've timed out, and some processor(s) are still unresponsive,
1536 * interrupt them with an NMI via the local APIC.
1537 */
1538 if (mp_kdp_ncpus != ncpus) {
1539 for (cpu = 0; cpu < real_ncpus; cpu++) {
1540 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1541 continue;
1542 if (cpu_signal_pending(cpu, MP_KDP))
1543 cpu_NMI_interrupt(cpu);
1544 }
1545 }
1546 }
1547 else
1548 for (cpu = 0; cpu < real_ncpus; cpu++) {
1549 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1550 continue;
1551 cpu_NMI_interrupt(cpu);
1552 }
1553
1554 DBG("mp_kdp_enter() %d processors done %s\n",
1555 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1556
1557 postcode(MP_KDP_ENTER);
1558 }
1559
1560 static boolean_t
1561 cpu_signal_pending(int cpu, mp_event_t event)
1562 {
1563 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1564 boolean_t retval = FALSE;
1565
1566 if (i_bit(event, signals))
1567 retval = TRUE;
1568 return retval;
1569 }
1570
1571 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1572 void *arg0, void *arg1)
1573 {
1574 if (lcpu > (real_ncpus - 1))
1575 return -1;
1576
1577 if (func == NULL)
1578 return -1;
1579
1580 kdp_xcpu_call_func.func = func;
1581 kdp_xcpu_call_func.ret = -1;
1582 kdp_xcpu_call_func.arg0 = arg0;
1583 kdp_xcpu_call_func.arg1 = arg1;
1584 kdp_xcpu_call_func.cpu = lcpu;
1585 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1586 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1587 cpu_pause();
1588 return kdp_xcpu_call_func.ret;
1589 }
1590
1591 static void
1592 kdp_x86_xcpu_poll(void)
1593 {
1594 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1595 kdp_xcpu_call_func.ret =
1596 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1597 kdp_xcpu_call_func.arg1,
1598 cpu_number());
1599 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1600 }
1601 }
1602
1603 static void
1604 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1605 {
1606 DBG("mp_kdp_wait()\n");
1607 /* If an I/O port has been specified as a debugging aid, issue a read */
1608 panic_io_port_read();
1609 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1610 #if CONFIG_MCA
1611 /* If we've trapped due to a machine-check, save MCA registers */
1612 mca_check_save();
1613 #endif
1614
1615 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1616 while (mp_kdp_trap || (isNMI == TRUE)) {
1617 /*
1618 * A TLB shootdown request may be pending--this would result
1619 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1620 * until this processor handles it.
1621 * Process it, so it can now enter mp_kdp_wait()
1622 */
1623 if (flush)
1624 handle_pending_TLB_flushes();
1625
1626 kdp_x86_xcpu_poll();
1627 cpu_pause();
1628 }
1629
1630 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1631 DBG("mp_kdp_wait() done\n");
1632 }
1633
1634 void
1635 mp_kdp_exit(void)
1636 {
1637 DBG("mp_kdp_exit()\n");
1638 debugger_cpu = -1;
1639 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1640
1641 debugger_exit_time = mach_absolute_time();
1642
1643 mp_kdp_trap = FALSE;
1644 mfence();
1645
1646 /* Wait other processors to stop spinning. XXX needs timeout */
1647 DBG("mp_kdp_exit() waiting for processors to resume\n");
1648 while (mp_kdp_ncpus > 0) {
1649 /*
1650 * a TLB shootdown request may be pending... this would result in the requesting
1651 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1652 * Process it, so it can now enter mp_kdp_wait()
1653 */
1654 handle_pending_TLB_flushes();
1655
1656 cpu_pause();
1657 }
1658
1659 if (pmsafe_debug && !kdp_snapshot)
1660 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1661
1662 debugger_exit_time = mach_absolute_time();
1663
1664 DBG("mp_kdp_exit() done\n");
1665 (void) ml_set_interrupts_enabled(mp_kdp_state);
1666 postcode(0);
1667 }
1668 #endif /* MACH_KDP */
1669
1670 boolean_t
1671 mp_recent_debugger_activity() {
1672 uint64_t abstime = mach_absolute_time();
1673 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1674 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1675 }
1676
1677 /*ARGSUSED*/
1678 void
1679 init_ast_check(
1680 __unused processor_t processor)
1681 {
1682 }
1683
1684 void
1685 cause_ast_check(
1686 processor_t processor)
1687 {
1688 int cpu = processor->cpu_id;
1689
1690 if (cpu != cpu_number()) {
1691 i386_signal_cpu(cpu, MP_AST, ASYNC);
1692 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1693 }
1694 }
1695
1696 void
1697 slave_machine_init(void *param)
1698 {
1699 /*
1700 * Here in process context, but with interrupts disabled.
1701 */
1702 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1703
1704 if (param == FULL_SLAVE_INIT) {
1705 /*
1706 * Cold start
1707 */
1708 clock_init();
1709 cpu_machine_init(); /* Interrupts enabled hereafter */
1710 mp_cpus_call_cpu_init();
1711 } else {
1712 cpu_machine_init(); /* Interrupts enabled hereafter */
1713 }
1714 }
1715
1716 #undef cpu_number
1717 int cpu_number(void)
1718 {
1719 return get_cpu_number();
1720 }
1721
1722 static void
1723 cpu_prewarm_init()
1724 {
1725 int i;
1726
1727 simple_lock_init(&cpu_warm_lock, 0);
1728 queue_init(&cpu_warm_call_list);
1729 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1730 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1731 }
1732 }
1733
1734 static timer_call_t
1735 grab_warm_timer_call()
1736 {
1737 spl_t x;
1738 timer_call_t call = NULL;
1739
1740 x = splsched();
1741 simple_lock(&cpu_warm_lock);
1742 if (!queue_empty(&cpu_warm_call_list)) {
1743 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1744 }
1745 simple_unlock(&cpu_warm_lock);
1746 splx(x);
1747
1748 return call;
1749 }
1750
1751 static void
1752 free_warm_timer_call(timer_call_t call)
1753 {
1754 spl_t x;
1755
1756 x = splsched();
1757 simple_lock(&cpu_warm_lock);
1758 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1759 simple_unlock(&cpu_warm_lock);
1760 splx(x);
1761 }
1762
1763 /*
1764 * Runs in timer call context (interrupts disabled).
1765 */
1766 static void
1767 cpu_warm_timer_call_func(
1768 call_entry_param_t p0,
1769 __unused call_entry_param_t p1)
1770 {
1771 free_warm_timer_call((timer_call_t)p0);
1772 return;
1773 }
1774
1775 /*
1776 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1777 */
1778 static void
1779 _cpu_warm_setup(
1780 void *arg)
1781 {
1782 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1783
1784 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
1785 cwdp->cwd_result = 0;
1786
1787 return;
1788 }
1789
1790 /*
1791 * Not safe to call with interrupts disabled.
1792 */
1793 kern_return_t
1794 ml_interrupt_prewarm(
1795 uint64_t deadline)
1796 {
1797 struct cpu_warm_data cwd;
1798 timer_call_t call;
1799 cpu_t ct;
1800
1801 if (ml_get_interrupts_enabled() == FALSE) {
1802 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1803 }
1804
1805 /*
1806 * If the platform doesn't need our help, say that we succeeded.
1807 */
1808 if (!ml_get_interrupt_prewake_applicable()) {
1809 return KERN_SUCCESS;
1810 }
1811
1812 /*
1813 * Grab a timer call to use.
1814 */
1815 call = grab_warm_timer_call();
1816 if (call == NULL) {
1817 return KERN_RESOURCE_SHORTAGE;
1818 }
1819
1820 timer_call_setup(call, cpu_warm_timer_call_func, call);
1821 cwd.cwd_call = call;
1822 cwd.cwd_deadline = deadline;
1823 cwd.cwd_result = 0;
1824
1825 /*
1826 * For now, non-local interrupts happen on the master processor.
1827 */
1828 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1829 if (ct == 0) {
1830 free_warm_timer_call(call);
1831 return KERN_FAILURE;
1832 } else {
1833 return cwd.cwd_result;
1834 }
1835 }