]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
286b822aa2bc4e172e870e5876e05cc206fa06b4
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 *
3 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 *
5 * This file contains Original Code and/or Modifications of Original Code
6 * as defined in and that are subject to the Apple Public Source License
7 * Version 2.0 (the 'License'). You may not use this file except in
8 * compliance with the License. The rights granted to you under the License
9 * may not be used to create, or enable the creation or redistribution of,
10 * unlawful or unlicensed copies of an Apple operating system, or to
11 * circumvent, violate, or enable the circumvention or violation of, any
12 * terms of an Apple operating system software license agreement.
13 *
14 * Please obtain a copy of the License at
15 * http://www.opensource.apple.com/apsl/ and read it before using this file.
16 *
17 * The Original Code and all software distributed under the License are
18 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 * Please see the License for the specific language governing rights and
23 * limitations under the License.
24 *
25 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 */
27 /*
28 * @OSF_COPYRIGHT@
29 */
30
31 #include <mach_rt.h>
32 #include <mach_kdp.h>
33 #include <mach_ldebug.h>
34 #include <gprof.h>
35
36 #include <mach/mach_types.h>
37 #include <mach/kern_return.h>
38
39 #include <kern/kern_types.h>
40 #include <kern/startup.h>
41 #include <kern/timer_queue.h>
42 #include <kern/processor.h>
43 #include <kern/cpu_number.h>
44 #include <kern/cpu_data.h>
45 #include <kern/assert.h>
46 #include <kern/machine.h>
47 #include <kern/pms.h>
48 #include <kern/misc_protos.h>
49 #include <kern/etimer.h>
50 #include <kern/kalloc.h>
51 #include <kern/queue.h>
52
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55
56 #include <profiling/profile-mk.h>
57
58 #include <i386/proc_reg.h>
59 #include <i386/cpu_threads.h>
60 #include <i386/mp_desc.h>
61 #include <i386/misc_protos.h>
62 #include <i386/trap.h>
63 #include <i386/postcode.h>
64 #include <i386/machine_routines.h>
65 #include <i386/mp.h>
66 #include <i386/mp_events.h>
67 #include <i386/lapic.h>
68 #include <i386/cpuid.h>
69 #include <i386/fpu.h>
70 #include <i386/machine_cpu.h>
71 #include <i386/pmCPU.h>
72 #if CONFIG_MCA
73 #include <i386/machine_check.h>
74 #endif
75 #include <i386/acpi.h>
76
77 #include <chud/chud_xnu.h>
78 #include <chud/chud_xnu_private.h>
79
80 #include <sys/kdebug.h>
81
82 #if MP_DEBUG
83 #define PAUSE delay(1000000)
84 #define DBG(x...) kprintf(x)
85 #else
86 #define DBG(x...)
87 #define PAUSE
88 #endif /* MP_DEBUG */
89
90 /* Debugging/test trace events: */
91 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
92 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
93 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
94 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
95 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
96 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
97 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
98 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
99
100 #define ABS(v) (((v) > 0)?(v):-(v))
101
102 void slave_boot_init(void);
103 void i386_cpu_IPI(int cpu);
104
105 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
106 static void mp_rendezvous_action(void);
107 static void mp_broadcast_action(void);
108
109 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
110 static int NMIInterruptHandler(x86_saved_state_t *regs);
111
112 boolean_t smp_initialized = FALSE;
113 uint32_t TSC_sync_margin = 0xFFF;
114 volatile boolean_t force_immediate_debugger_NMI = FALSE;
115 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
116 decl_simple_lock_data(,mp_kdp_lock);
117
118 decl_lck_mtx_data(static, mp_cpu_boot_lock);
119 lck_mtx_ext_t mp_cpu_boot_lock_ext;
120
121 /* Variables needed for MP rendezvous. */
122 decl_simple_lock_data(,mp_rv_lock);
123 static void (*mp_rv_setup_func)(void *arg);
124 static void (*mp_rv_action_func)(void *arg);
125 static void (*mp_rv_teardown_func)(void *arg);
126 static void *mp_rv_func_arg;
127 static volatile int mp_rv_ncpus;
128 /* Cache-aligned barriers: */
129 static volatile long mp_rv_entry __attribute__((aligned(64)));
130 static volatile long mp_rv_exit __attribute__((aligned(64)));
131 static volatile long mp_rv_complete __attribute__((aligned(64)));
132
133 volatile uint64_t debugger_entry_time;
134 volatile uint64_t debugger_exit_time;
135 #if MACH_KDP
136 #include <kdp/kdp.h>
137 extern int kdp_snapshot;
138 static struct _kdp_xcpu_call_func {
139 kdp_x86_xcpu_func_t func;
140 void *arg0, *arg1;
141 volatile long ret;
142 volatile uint16_t cpu;
143 } kdp_xcpu_call_func = {
144 .cpu = KDP_XCPU_NONE
145 };
146
147 #endif
148
149 /* Variables needed for MP broadcast. */
150 static void (*mp_bc_action_func)(void *arg);
151 static void *mp_bc_func_arg;
152 static int mp_bc_ncpus;
153 static volatile long mp_bc_count;
154 decl_lck_mtx_data(static, mp_bc_lock);
155 lck_mtx_ext_t mp_bc_lock_ext;
156 static volatile int debugger_cpu = -1;
157 volatile long NMIPI_acks = 0;
158
159 static void mp_cpus_call_init(void);
160 static void mp_cpus_call_cpu_init(void);
161 static void mp_cpus_call_action(void);
162 static void mp_call_PM(void);
163
164 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
165
166 /* PAL-related routines */
167 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
168 int ipi_vector, i386_intr_func_t ipi_handler);
169 void i386_start_cpu(int lapic_id, int cpu_num);
170 void i386_send_NMI(int cpu);
171
172 #if GPROF
173 /*
174 * Initialize dummy structs for profiling. These aren't used but
175 * allows hertz_tick() to be built with GPROF defined.
176 */
177 struct profile_vars _profile_vars;
178 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
179 #define GPROF_INIT() \
180 { \
181 int i; \
182 \
183 /* Hack to initialize pointers to unused profiling structs */ \
184 for (i = 1; i < MAX_CPUS; i++) \
185 _profile_vars_cpus[i] = &_profile_vars; \
186 }
187 #else
188 #define GPROF_INIT()
189 #endif /* GPROF */
190
191 static lck_grp_t smp_lck_grp;
192 static lck_grp_attr_t smp_lck_grp_attr;
193
194 #define NUM_CPU_WARM_CALLS 20
195 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
196 queue_head_t cpu_warm_call_list;
197 decl_simple_lock_data(static, cpu_warm_lock);
198
199 typedef struct cpu_warm_data {
200 timer_call_t cwd_call;
201 uint64_t cwd_deadline;
202 int cwd_result;
203 } *cpu_warm_data_t;
204
205 static void cpu_prewarm_init(void);
206 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
207 static void _cpu_warm_setup(void *arg);
208 static timer_call_t grab_warm_timer_call(void);
209 static void free_warm_timer_call(timer_call_t call);
210
211 void
212 smp_init(void)
213 {
214 simple_lock_init(&mp_kdp_lock, 0);
215 simple_lock_init(&mp_rv_lock, 0);
216 lck_grp_attr_setdefault(&smp_lck_grp_attr);
217 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
218 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
219 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
220 console_init();
221
222 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
223 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
224 return;
225
226 cpu_thread_init();
227
228 GPROF_INIT();
229 DBGLOG_CPU_INIT(master_cpu);
230
231 mp_cpus_call_init();
232 mp_cpus_call_cpu_init();
233
234 if (PE_parse_boot_argn("TSC_sync_margin",
235 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
236 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
237 } else if (cpuid_vmm_present()) {
238 kprintf("TSC sync margin disabled\n");
239 TSC_sync_margin = 0;
240 }
241 smp_initialized = TRUE;
242
243 cpu_prewarm_init();
244
245 return;
246 }
247
248 typedef struct {
249 int target_cpu;
250 int target_lapic;
251 int starter_cpu;
252 } processor_start_info_t;
253 static processor_start_info_t start_info __attribute__((aligned(64)));
254
255 /*
256 * Cache-alignment is to avoid cross-cpu false-sharing interference.
257 */
258 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
259 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
260 static volatile uint64_t tsc_target __attribute__((aligned(64)));
261
262 /*
263 * Poll a CPU to see when it has marked itself as running.
264 */
265 static void
266 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
267 {
268 while (iters-- > 0) {
269 if (cpu_datap(slot_num)->cpu_running)
270 break;
271 delay(usecdelay);
272 }
273 }
274
275 /*
276 * Quickly bring a CPU back online which has been halted.
277 */
278 kern_return_t
279 intel_startCPU_fast(int slot_num)
280 {
281 kern_return_t rc;
282
283 /*
284 * Try to perform a fast restart
285 */
286 rc = pmCPUExitHalt(slot_num);
287 if (rc != KERN_SUCCESS)
288 /*
289 * The CPU was not eligible for a fast restart.
290 */
291 return(rc);
292
293 KERNEL_DEBUG_CONSTANT(
294 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
295 slot_num, 0, 0, 0, 0);
296
297 /*
298 * Wait until the CPU is back online.
299 */
300 mp_disable_preemption();
301
302 /*
303 * We use short pauses (1us) for low latency. 30,000 iterations is
304 * longer than a full restart would require so it should be more
305 * than long enough.
306 */
307
308 mp_wait_for_cpu_up(slot_num, 30000, 1);
309 mp_enable_preemption();
310
311 KERNEL_DEBUG_CONSTANT(
312 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
313 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
314
315 /*
316 * Check to make sure that the CPU is really running. If not,
317 * go through the slow path.
318 */
319 if (cpu_datap(slot_num)->cpu_running)
320 return(KERN_SUCCESS);
321 else
322 return(KERN_FAILURE);
323 }
324
325 static void
326 started_cpu(void)
327 {
328 /* Here on the started cpu with cpu_running set TRUE */
329
330 if (TSC_sync_margin &&
331 start_info.target_cpu == cpu_number()) {
332 /*
333 * I've just started-up, synchronize again with the starter cpu
334 * and then snap my TSC.
335 */
336 tsc_target = 0;
337 atomic_decl(&tsc_entry_barrier, 1);
338 while (tsc_entry_barrier != 0)
339 ; /* spin for starter and target at barrier */
340 tsc_target = rdtsc64();
341 atomic_decl(&tsc_exit_barrier, 1);
342 }
343 }
344
345 static void
346 start_cpu(void *arg)
347 {
348 int i = 1000;
349 processor_start_info_t *psip = (processor_start_info_t *) arg;
350
351 /* Ignore this if the current processor is not the starter */
352 if (cpu_number() != psip->starter_cpu)
353 return;
354
355 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
356 arg, psip->target_cpu, psip->target_lapic);
357
358 KERNEL_DEBUG_CONSTANT(
359 TRACE_MP_CPU_START | DBG_FUNC_START,
360 psip->target_cpu,
361 psip->target_lapic, 0, 0, 0);
362
363 i386_start_cpu(psip->target_lapic, psip->target_cpu);
364
365 #ifdef POSTCODE_DELAY
366 /* Wait much longer if postcodes are displayed for a delay period. */
367 i *= 10000;
368 #endif
369 DBG("start_cpu(%p) about to wait for cpu %d\n",
370 arg, psip->target_cpu);
371
372 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
373
374 KERNEL_DEBUG_CONSTANT(
375 TRACE_MP_CPU_START | DBG_FUNC_END,
376 psip->target_cpu,
377 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
378
379 if (TSC_sync_margin &&
380 cpu_datap(psip->target_cpu)->cpu_running) {
381 /*
382 * Compare the TSC from the started processor with ours.
383 * Report and log/panic if it diverges by more than
384 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
385 * can be overriden by boot-arg (with 0 meaning no checking).
386 */
387 uint64_t tsc_starter;
388 int64_t tsc_delta;
389 atomic_decl(&tsc_entry_barrier, 1);
390 while (tsc_entry_barrier != 0)
391 ; /* spin for both processors at barrier */
392 tsc_starter = rdtsc64();
393 atomic_decl(&tsc_exit_barrier, 1);
394 while (tsc_exit_barrier != 0)
395 ; /* spin for target to store its TSC */
396 tsc_delta = tsc_target - tsc_starter;
397 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
398 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
399 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
400 #if DEBUG
401 panic(
402 #else
403 printf(
404 #endif
405 "Unsynchronized TSC for cpu %d: "
406 "0x%016llx, delta 0x%llx\n",
407 psip->target_cpu, tsc_target, tsc_delta);
408 }
409 }
410 }
411
412 kern_return_t
413 intel_startCPU(
414 int slot_num)
415 {
416 int lapic = cpu_to_lapic[slot_num];
417 boolean_t istate;
418
419 assert(lapic != -1);
420
421 DBGLOG_CPU_INIT(slot_num);
422
423 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
424 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
425
426 /*
427 * Initialize (or re-initialize) the descriptor tables for this cpu.
428 * Propagate processor mode to slave.
429 */
430 if (cpu_mode_is64bit())
431 cpu_desc_init64(cpu_datap(slot_num));
432 else
433 cpu_desc_init(cpu_datap(slot_num));
434
435 /* Serialize use of the slave boot stack, etc. */
436 lck_mtx_lock(&mp_cpu_boot_lock);
437
438 istate = ml_set_interrupts_enabled(FALSE);
439 if (slot_num == get_cpu_number()) {
440 ml_set_interrupts_enabled(istate);
441 lck_mtx_unlock(&mp_cpu_boot_lock);
442 return KERN_SUCCESS;
443 }
444
445 start_info.starter_cpu = cpu_number();
446 start_info.target_cpu = slot_num;
447 start_info.target_lapic = lapic;
448 tsc_entry_barrier = 2;
449 tsc_exit_barrier = 2;
450
451 /*
452 * Perform the processor startup sequence with all running
453 * processors rendezvous'ed. This is required during periods when
454 * the cache-disable bit is set for MTRR/PAT initialization.
455 */
456 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
457
458 start_info.target_cpu = 0;
459
460 ml_set_interrupts_enabled(istate);
461 lck_mtx_unlock(&mp_cpu_boot_lock);
462
463 if (!cpu_datap(slot_num)->cpu_running) {
464 kprintf("Failed to start CPU %02d\n", slot_num);
465 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
466 delay(1000000);
467 halt_cpu();
468 return KERN_SUCCESS;
469 } else {
470 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
471 return KERN_SUCCESS;
472 }
473 }
474
475 #if MP_DEBUG
476 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
477 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
478
479 MP_EVENT_NAME_DECL();
480
481 #endif /* MP_DEBUG */
482
483 int
484 cpu_signal_handler(x86_saved_state_t *regs)
485 {
486 int my_cpu;
487 volatile int *my_word;
488
489 SCHED_STATS_IPI(current_processor());
490
491 my_cpu = cpu_number();
492 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
493 /* Store the initial set of signals for diagnostics. New
494 * signals could arrive while these are being processed
495 * so it's no more than a hint.
496 */
497
498 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
499
500 do {
501 #if MACH_KDP
502 if (i_bit(MP_KDP, my_word)) {
503 DBGLOG(cpu_handle,my_cpu,MP_KDP);
504 i_bit_clear(MP_KDP, my_word);
505 /* Ensure that the i386_kernel_state at the base of the
506 * current thread's stack (if any) is synchronized with the
507 * context at the moment of the interrupt, to facilitate
508 * access through the debugger.
509 */
510 sync_iss_to_iks(regs);
511 if (pmsafe_debug && !kdp_snapshot)
512 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
513 mp_kdp_wait(TRUE, FALSE);
514 if (pmsafe_debug && !kdp_snapshot)
515 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
516 } else
517 #endif /* MACH_KDP */
518 if (i_bit(MP_TLB_FLUSH, my_word)) {
519 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
520 i_bit_clear(MP_TLB_FLUSH, my_word);
521 pmap_update_interrupt();
522 } else if (i_bit(MP_AST, my_word)) {
523 DBGLOG(cpu_handle,my_cpu,MP_AST);
524 i_bit_clear(MP_AST, my_word);
525 ast_check(cpu_to_processor(my_cpu));
526 } else if (i_bit(MP_RENDEZVOUS, my_word)) {
527 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
528 i_bit_clear(MP_RENDEZVOUS, my_word);
529 mp_rendezvous_action();
530 } else if (i_bit(MP_BROADCAST, my_word)) {
531 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
532 i_bit_clear(MP_BROADCAST, my_word);
533 mp_broadcast_action();
534 } else if (i_bit(MP_CHUD, my_word)) {
535 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
536 i_bit_clear(MP_CHUD, my_word);
537 chudxnu_cpu_signal_handler();
538 } else if (i_bit(MP_CALL, my_word)) {
539 DBGLOG(cpu_handle,my_cpu,MP_CALL);
540 i_bit_clear(MP_CALL, my_word);
541 mp_cpus_call_action();
542 } else if (i_bit(MP_CALL_PM, my_word)) {
543 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
544 i_bit_clear(MP_CALL_PM, my_word);
545 mp_call_PM();
546 }
547 } while (*my_word);
548
549 return 0;
550 }
551
552 static int
553 NMIInterruptHandler(x86_saved_state_t *regs)
554 {
555 void *stackptr;
556
557 if (panic_active() && !panicDebugging) {
558 if (pmsafe_debug)
559 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
560 for(;;)
561 cpu_pause();
562 }
563
564 atomic_incl(&NMIPI_acks, 1);
565 sync_iss_to_iks_unconditionally(regs);
566 #if defined (__i386__)
567 __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
568 #elif defined (__x86_64__)
569 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
570 #endif
571
572 if (cpu_number() == debugger_cpu)
573 goto NMExit;
574
575 if (spinlock_timed_out) {
576 char pstr[192];
577 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
578 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
579 } else if (pmap_tlb_flush_timeout == TRUE) {
580 char pstr[128];
581 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
582 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
583 }
584
585 #if MACH_KDP
586 if (pmsafe_debug && !kdp_snapshot)
587 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
588 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
589 mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
590 if (pmsafe_debug && !kdp_snapshot)
591 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
592 #endif
593 NMExit:
594 return 1;
595 }
596
597
598 /*
599 * cpu_interrupt is really just to be used by the scheduler to
600 * get a CPU's attention it may not always issue an IPI. If an
601 * IPI is always needed then use i386_cpu_IPI.
602 */
603 void
604 cpu_interrupt(int cpu)
605 {
606 boolean_t did_IPI = FALSE;
607
608 if (smp_initialized
609 && pmCPUExitIdle(cpu_datap(cpu))) {
610 i386_cpu_IPI(cpu);
611 did_IPI = TRUE;
612 }
613
614 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
615 }
616
617 /*
618 * Send a true NMI via the local APIC to the specified CPU.
619 */
620 void
621 cpu_NMI_interrupt(int cpu)
622 {
623 if (smp_initialized) {
624 i386_send_NMI(cpu);
625 }
626 }
627
628 static void (* volatile mp_PM_func)(void) = NULL;
629
630 static void
631 mp_call_PM(void)
632 {
633 assert(!ml_get_interrupts_enabled());
634
635 if (mp_PM_func != NULL)
636 mp_PM_func();
637 }
638
639 void
640 cpu_PM_interrupt(int cpu)
641 {
642 assert(!ml_get_interrupts_enabled());
643
644 if (mp_PM_func != NULL) {
645 if (cpu == cpu_number())
646 mp_PM_func();
647 else
648 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
649 }
650 }
651
652 void
653 PM_interrupt_register(void (*fn)(void))
654 {
655 mp_PM_func = fn;
656 }
657
658 void
659 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
660 {
661 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
662 uint64_t tsc_timeout;
663
664
665 if (!cpu_datap(cpu)->cpu_running)
666 return;
667
668 if (event == MP_TLB_FLUSH)
669 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
670
671 DBGLOG(cpu_signal, cpu, event);
672
673 i_bit_set(event, signals);
674 i386_cpu_IPI(cpu);
675 if (mode == SYNC) {
676 again:
677 tsc_timeout = rdtsc64() + (1000*1000*1000);
678 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
679 cpu_pause();
680 }
681 if (i_bit(event, signals)) {
682 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
683 cpu, event);
684 goto again;
685 }
686 }
687 if (event == MP_TLB_FLUSH)
688 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
689 }
690
691 /*
692 * Send event to all running cpus.
693 * Called with the topology locked.
694 */
695 void
696 i386_signal_cpus(mp_event_t event, mp_sync_t mode)
697 {
698 unsigned int cpu;
699 unsigned int my_cpu = cpu_number();
700
701 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
702
703 for (cpu = 0; cpu < real_ncpus; cpu++) {
704 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
705 continue;
706 i386_signal_cpu(cpu, event, mode);
707 }
708 }
709
710 /*
711 * Return the number of running cpus.
712 * Called with the topology locked.
713 */
714 int
715 i386_active_cpus(void)
716 {
717 unsigned int cpu;
718 unsigned int ncpus = 0;
719
720 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
721
722 for (cpu = 0; cpu < real_ncpus; cpu++) {
723 if (cpu_datap(cpu)->cpu_running)
724 ncpus++;
725 }
726 return(ncpus);
727 }
728
729 /*
730 * All-CPU rendezvous:
731 * - CPUs are signalled,
732 * - all execute the setup function (if specified),
733 * - rendezvous (i.e. all cpus reach a barrier),
734 * - all execute the action function (if specified),
735 * - rendezvous again,
736 * - execute the teardown function (if specified), and then
737 * - resume.
738 *
739 * Note that the supplied external functions _must_ be reentrant and aware
740 * that they are running in parallel and in an unknown lock context.
741 */
742
743 static void
744 mp_rendezvous_action(void)
745 {
746 boolean_t intrs_enabled;
747
748 /* setup function */
749 if (mp_rv_setup_func != NULL)
750 mp_rv_setup_func(mp_rv_func_arg);
751
752 intrs_enabled = ml_get_interrupts_enabled();
753
754 /* spin on entry rendezvous */
755 atomic_incl(&mp_rv_entry, 1);
756 while (mp_rv_entry < mp_rv_ncpus) {
757 /* poll for pesky tlb flushes if interrupts disabled */
758 if (!intrs_enabled)
759 handle_pending_TLB_flushes();
760 cpu_pause();
761 }
762
763 /* action function */
764 if (mp_rv_action_func != NULL)
765 mp_rv_action_func(mp_rv_func_arg);
766
767 /* spin on exit rendezvous */
768 atomic_incl(&mp_rv_exit, 1);
769 while (mp_rv_exit < mp_rv_ncpus) {
770 if (!intrs_enabled)
771 handle_pending_TLB_flushes();
772 cpu_pause();
773 }
774
775 /* teardown function */
776 if (mp_rv_teardown_func != NULL)
777 mp_rv_teardown_func(mp_rv_func_arg);
778
779 /* Bump completion count */
780 atomic_incl(&mp_rv_complete, 1);
781 }
782
783 void
784 mp_rendezvous(void (*setup_func)(void *),
785 void (*action_func)(void *),
786 void (*teardown_func)(void *),
787 void *arg)
788 {
789
790 if (!smp_initialized) {
791 if (setup_func != NULL)
792 setup_func(arg);
793 if (action_func != NULL)
794 action_func(arg);
795 if (teardown_func != NULL)
796 teardown_func(arg);
797 return;
798 }
799
800 /* obtain rendezvous lock */
801 simple_lock(&mp_rv_lock);
802
803 /* set static function pointers */
804 mp_rv_setup_func = setup_func;
805 mp_rv_action_func = action_func;
806 mp_rv_teardown_func = teardown_func;
807 mp_rv_func_arg = arg;
808
809 mp_rv_entry = 0;
810 mp_rv_exit = 0;
811 mp_rv_complete = 0;
812
813 /*
814 * signal other processors, which will call mp_rendezvous_action()
815 * with interrupts disabled
816 */
817 simple_lock(&x86_topo_lock);
818 mp_rv_ncpus = i386_active_cpus();
819 i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
820 simple_unlock(&x86_topo_lock);
821
822 /* call executor function on this cpu */
823 mp_rendezvous_action();
824
825 /*
826 * Spin for everyone to complete.
827 * This is necessary to ensure that all processors have proceeded
828 * from the exit barrier before we release the rendezvous structure.
829 */
830 while (mp_rv_complete < mp_rv_ncpus) {
831 cpu_pause();
832 }
833
834 /* Tidy up */
835 mp_rv_setup_func = NULL;
836 mp_rv_action_func = NULL;
837 mp_rv_teardown_func = NULL;
838 mp_rv_func_arg = NULL;
839
840 /* release lock */
841 simple_unlock(&mp_rv_lock);
842 }
843
844 void
845 mp_rendezvous_break_lock(void)
846 {
847 simple_lock_init(&mp_rv_lock, 0);
848 }
849
850 static void
851 setup_disable_intrs(__unused void * param_not_used)
852 {
853 /* disable interrupts before the first barrier */
854 boolean_t intr = ml_set_interrupts_enabled(FALSE);
855
856 current_cpu_datap()->cpu_iflag = intr;
857 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
858 }
859
860 static void
861 teardown_restore_intrs(__unused void * param_not_used)
862 {
863 /* restore interrupt flag following MTRR changes */
864 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
865 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
866 }
867
868 /*
869 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
870 * This is exported for use by kexts.
871 */
872 void
873 mp_rendezvous_no_intrs(
874 void (*action_func)(void *),
875 void *arg)
876 {
877 mp_rendezvous(setup_disable_intrs,
878 action_func,
879 teardown_restore_intrs,
880 arg);
881 }
882
883
884 typedef struct {
885 queue_chain_t link; /* queue linkage */
886 void (*func)(void *,void *); /* routine to call */
887 void *arg0; /* routine's 1st arg */
888 void *arg1; /* routine's 2nd arg */
889 volatile long *countp; /* completion counter */
890 } mp_call_t;
891
892
893 typedef struct {
894 queue_head_t queue;
895 decl_simple_lock_data(, lock);
896 } mp_call_queue_t;
897 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
898 static mp_call_queue_t mp_cpus_call_freelist;
899 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
900
901 static inline boolean_t
902 mp_call_head_lock(mp_call_queue_t *cqp)
903 {
904 boolean_t intrs_enabled;
905
906 intrs_enabled = ml_set_interrupts_enabled(FALSE);
907 simple_lock(&cqp->lock);
908
909 return intrs_enabled;
910 }
911
912 static inline boolean_t
913 mp_call_head_is_locked(mp_call_queue_t *cqp)
914 {
915 return !ml_get_interrupts_enabled() &&
916 hw_lock_held((hw_lock_t)&cqp->lock);
917 }
918
919 static inline void
920 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
921 {
922 simple_unlock(&cqp->lock);
923 ml_set_interrupts_enabled(intrs_enabled);
924 }
925
926 static inline mp_call_t *
927 mp_call_alloc(void)
928 {
929 mp_call_t *callp = NULL;
930 boolean_t intrs_enabled;
931 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
932
933 intrs_enabled = mp_call_head_lock(cqp);
934 if (!queue_empty(&cqp->queue))
935 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
936 mp_call_head_unlock(cqp, intrs_enabled);
937
938 return callp;
939 }
940
941 static inline void
942 mp_call_free(mp_call_t *callp)
943 {
944 boolean_t intrs_enabled;
945 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
946
947 intrs_enabled = mp_call_head_lock(cqp);
948 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
949 mp_call_head_unlock(cqp, intrs_enabled);
950 }
951
952 static inline mp_call_t *
953 mp_call_dequeue_locked(mp_call_queue_t *cqp)
954 {
955 mp_call_t *callp = NULL;
956
957 assert(mp_call_head_is_locked(cqp));
958 if (!queue_empty(&cqp->queue))
959 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
960 return callp;
961 }
962
963 static inline void
964 mp_call_enqueue_locked(
965 mp_call_queue_t *cqp,
966 mp_call_t *callp)
967 {
968 queue_enter(&cqp->queue, callp, typeof(callp), link);
969 }
970
971 /* Called on the boot processor to initialize global structures */
972 static void
973 mp_cpus_call_init(void)
974 {
975 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
976
977 DBG("mp_cpus_call_init()\n");
978 simple_lock_init(&cqp->lock, 0);
979 queue_init(&cqp->queue);
980 }
981
982 /*
983 * Called by each processor to add call buffers to the free list
984 * and to initialize the per-cpu call queue.
985 * Also called but ignored on slave processors on re-start/wake.
986 */
987 static void
988 mp_cpus_call_cpu_init(void)
989 {
990 int i;
991 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()];
992 mp_call_t *callp;
993
994 if (cqp->queue.next != NULL)
995 return; /* restart/wake case: called already */
996
997 simple_lock_init(&cqp->lock, 0);
998 queue_init(&cqp->queue);
999 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1000 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
1001 mp_call_free(callp);
1002 }
1003
1004 DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
1005 }
1006
1007 /*
1008 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1009 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1010 */
1011 static void
1012 mp_cpus_call_action(void)
1013 {
1014 mp_call_queue_t *cqp;
1015 boolean_t intrs_enabled;
1016 mp_call_t *callp;
1017 mp_call_t call;
1018
1019 assert(!ml_get_interrupts_enabled());
1020 cqp = &mp_cpus_call_head[cpu_number()];
1021 intrs_enabled = mp_call_head_lock(cqp);
1022 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1023 /* Copy call request to the stack to free buffer */
1024 call = *callp;
1025 mp_call_free(callp);
1026 if (call.func != NULL) {
1027 mp_call_head_unlock(cqp, intrs_enabled);
1028 KERNEL_DEBUG_CONSTANT(
1029 TRACE_MP_CPUS_CALL_ACTION,
1030 call.func, call.arg0, call.arg1, call.countp, 0);
1031 call.func(call.arg0, call.arg1);
1032 (void) mp_call_head_lock(cqp);
1033 }
1034 if (call.countp != NULL)
1035 atomic_incl(call.countp, 1);
1036 }
1037 mp_call_head_unlock(cqp, intrs_enabled);
1038 }
1039
1040 /*
1041 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1042 * Possible modes are:
1043 * SYNC: function is called serially on target cpus in logical cpu order
1044 * waiting for each call to be acknowledged before proceeding
1045 * ASYNC: function call is queued to the specified cpus
1046 * waiting for all calls to complete in parallel before returning
1047 * NOSYNC: function calls are queued
1048 * but we return before confirmation of calls completing.
1049 * The action function may be NULL.
1050 * The cpu mask may include the local cpu. Offline cpus are ignored.
1051 * The return value is the number of cpus on which the call was made or queued.
1052 */
1053 cpu_t
1054 mp_cpus_call(
1055 cpumask_t cpus,
1056 mp_sync_t mode,
1057 void (*action_func)(void *),
1058 void *arg)
1059 {
1060 return mp_cpus_call1(
1061 cpus,
1062 mode,
1063 (void (*)(void *,void *))action_func,
1064 arg,
1065 NULL,
1066 NULL,
1067 NULL);
1068 }
1069
1070 static void
1071 mp_cpus_call_wait(boolean_t intrs_enabled,
1072 long mp_cpus_signals,
1073 volatile long *mp_cpus_calls)
1074 {
1075 mp_call_queue_t *cqp;
1076
1077 cqp = &mp_cpus_call_head[cpu_number()];
1078
1079 while (*mp_cpus_calls < mp_cpus_signals) {
1080 if (!intrs_enabled) {
1081 /* Sniffing w/o locking */
1082 if (!queue_empty(&cqp->queue))
1083 mp_cpus_call_action();
1084 handle_pending_TLB_flushes();
1085 }
1086 cpu_pause();
1087 }
1088 }
1089
1090 cpu_t
1091 mp_cpus_call1(
1092 cpumask_t cpus,
1093 mp_sync_t mode,
1094 void (*action_func)(void *, void *),
1095 void *arg0,
1096 void *arg1,
1097 cpumask_t *cpus_calledp,
1098 cpumask_t *cpus_notcalledp)
1099 {
1100 cpu_t cpu;
1101 boolean_t intrs_enabled = FALSE;
1102 boolean_t call_self = FALSE;
1103 cpumask_t cpus_called = 0;
1104 cpumask_t cpus_notcalled = 0;
1105 long mp_cpus_signals = 0;
1106 volatile long mp_cpus_calls = 0;
1107
1108 KERNEL_DEBUG_CONSTANT(
1109 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1110 cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
1111
1112 if (!smp_initialized) {
1113 if ((cpus & CPUMASK_SELF) == 0)
1114 goto out;
1115 if (action_func != NULL) {
1116 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1117 action_func(arg0, arg1);
1118 ml_set_interrupts_enabled(intrs_enabled);
1119 }
1120 call_self = TRUE;
1121 goto out;
1122 }
1123
1124 /*
1125 * Queue the call for each non-local requested cpu.
1126 * The topo lock is not taken. Instead we sniff the cpu_running state
1127 * and then re-check it after taking the call lock. A cpu being taken
1128 * offline runs the action function after clearing the cpu_running.
1129 */
1130 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1131 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1132 !cpu_datap(cpu)->cpu_running)
1133 continue;
1134 if (cpu == (cpu_t) cpu_number()) {
1135 /*
1136 * We don't IPI ourself and if calling asynchronously,
1137 * we defer our call until we have signalled all others.
1138 */
1139 call_self = TRUE;
1140 cpus_called |= cpu_to_cpumask(cpu);
1141 if (mode == SYNC && action_func != NULL) {
1142 KERNEL_DEBUG_CONSTANT(
1143 TRACE_MP_CPUS_CALL_LOCAL,
1144 VM_KERNEL_UNSLIDE(action_func),
1145 arg0, arg1, 0, 0);
1146 action_func(arg0, arg1);
1147 }
1148 } else {
1149 /*
1150 * Here to queue a call to cpu and IPI.
1151 * Spinning for request buffer unless NOSYNC.
1152 */
1153 mp_call_t *callp = NULL;
1154 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1155
1156 queue_call:
1157 if (callp == NULL)
1158 callp = mp_call_alloc();
1159 intrs_enabled = mp_call_head_lock(cqp);
1160 if (!cpu_datap(cpu)->cpu_running) {
1161 mp_call_head_unlock(cqp, intrs_enabled);
1162 continue;
1163 }
1164 if (mode == NOSYNC) {
1165 if (callp == NULL) {
1166 cpus_notcalled |= cpu_to_cpumask(cpu);
1167 mp_call_head_unlock(cqp, intrs_enabled);
1168 KERNEL_DEBUG_CONSTANT(
1169 TRACE_MP_CPUS_CALL_NOBUF,
1170 cpu, 0, 0, 0, 0);
1171 continue;
1172 }
1173 callp->countp = NULL;
1174 } else {
1175 if (callp == NULL) {
1176 mp_call_head_unlock(cqp, intrs_enabled);
1177 KERNEL_DEBUG_CONSTANT(
1178 TRACE_MP_CPUS_CALL_NOBUF,
1179 cpu, 0, 0, 0, 0);
1180 if (!intrs_enabled) {
1181 /* Sniffing w/o locking */
1182 if (!queue_empty(&cqp->queue))
1183 mp_cpus_call_action();
1184 handle_pending_TLB_flushes();
1185 }
1186 cpu_pause();
1187 goto queue_call;
1188 }
1189 callp->countp = &mp_cpus_calls;
1190 }
1191 callp->func = action_func;
1192 callp->arg0 = arg0;
1193 callp->arg1 = arg1;
1194 mp_call_enqueue_locked(cqp, callp);
1195 mp_cpus_signals++;
1196 cpus_called |= cpu_to_cpumask(cpu);
1197 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1198 mp_call_head_unlock(cqp, intrs_enabled);
1199 if (mode == SYNC) {
1200 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1201 }
1202 }
1203 }
1204
1205 /* Call locally if mode not SYNC */
1206 if (mode != SYNC && call_self ) {
1207 KERNEL_DEBUG_CONSTANT(
1208 TRACE_MP_CPUS_CALL_LOCAL,
1209 VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
1210 if (action_func != NULL) {
1211 ml_set_interrupts_enabled(FALSE);
1212 action_func(arg0, arg1);
1213 ml_set_interrupts_enabled(intrs_enabled);
1214 }
1215 }
1216
1217 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1218 if (mode == ASYNC) {
1219 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1220 }
1221
1222 out:
1223 cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
1224
1225 if (cpus_calledp)
1226 *cpus_calledp = cpus_called;
1227 if (cpus_notcalledp)
1228 *cpus_notcalledp = cpus_notcalled;
1229
1230 KERNEL_DEBUG_CONSTANT(
1231 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1232 cpu, cpus_called, cpus_notcalled, 0, 0);
1233
1234 return cpu;
1235 }
1236
1237
1238 static void
1239 mp_broadcast_action(void)
1240 {
1241 /* call action function */
1242 if (mp_bc_action_func != NULL)
1243 mp_bc_action_func(mp_bc_func_arg);
1244
1245 /* if we're the last one through, wake up the instigator */
1246 if (atomic_decl_and_test(&mp_bc_count, 1))
1247 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1248 }
1249
1250 /*
1251 * mp_broadcast() runs a given function on all active cpus.
1252 * The caller blocks until the functions has run on all cpus.
1253 * The caller will also block if there is another pending braodcast.
1254 */
1255 void
1256 mp_broadcast(
1257 void (*action_func)(void *),
1258 void *arg)
1259 {
1260 if (!smp_initialized) {
1261 if (action_func != NULL)
1262 action_func(arg);
1263 return;
1264 }
1265
1266 /* obtain broadcast lock */
1267 lck_mtx_lock(&mp_bc_lock);
1268
1269 /* set static function pointers */
1270 mp_bc_action_func = action_func;
1271 mp_bc_func_arg = arg;
1272
1273 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1274
1275 /*
1276 * signal other processors, which will call mp_broadcast_action()
1277 */
1278 simple_lock(&x86_topo_lock);
1279 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */
1280 mp_bc_count = mp_bc_ncpus;
1281 i386_signal_cpus(MP_BROADCAST, ASYNC);
1282
1283 /* call executor function on this cpu */
1284 mp_broadcast_action();
1285 simple_unlock(&x86_topo_lock);
1286
1287 /* block for all cpus to have run action_func */
1288 if (mp_bc_ncpus > 1)
1289 thread_block(THREAD_CONTINUE_NULL);
1290 else
1291 clear_wait(current_thread(), THREAD_AWAKENED);
1292
1293 /* release lock */
1294 lck_mtx_unlock(&mp_bc_lock);
1295 }
1296
1297 void
1298 i386_activate_cpu(void)
1299 {
1300 cpu_data_t *cdp = current_cpu_datap();
1301
1302 assert(!ml_get_interrupts_enabled());
1303
1304 if (!smp_initialized) {
1305 cdp->cpu_running = TRUE;
1306 return;
1307 }
1308
1309 simple_lock(&x86_topo_lock);
1310 cdp->cpu_running = TRUE;
1311 started_cpu();
1312 simple_unlock(&x86_topo_lock);
1313 flush_tlb_raw();
1314 }
1315
1316 extern void etimer_timer_expire(void *arg);
1317
1318 void
1319 i386_deactivate_cpu(void)
1320 {
1321 cpu_data_t *cdp = current_cpu_datap();
1322
1323 assert(!ml_get_interrupts_enabled());
1324
1325 KERNEL_DEBUG_CONSTANT(
1326 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1327 0, 0, 0, 0, 0);
1328
1329 simple_lock(&x86_topo_lock);
1330 cdp->cpu_running = FALSE;
1331 simple_unlock(&x86_topo_lock);
1332
1333 /*
1334 * Move all of this cpu's timers to the master/boot cpu,
1335 * and poke it in case there's a sooner deadline for it to schedule.
1336 */
1337 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1338 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
1339
1340 /*
1341 * Open an interrupt window
1342 * and ensure any pending IPI or timer is serviced
1343 */
1344 mp_disable_preemption();
1345 ml_set_interrupts_enabled(TRUE);
1346
1347 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
1348 cpu_pause();
1349 /*
1350 * Ensure there's no remaining timer deadline set
1351 * - AICPM may have left one active.
1352 */
1353 setPop(0);
1354
1355 ml_set_interrupts_enabled(FALSE);
1356 mp_enable_preemption();
1357
1358 KERNEL_DEBUG_CONSTANT(
1359 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1360 0, 0, 0, 0, 0);
1361 }
1362
1363 int pmsafe_debug = 1;
1364
1365 #if MACH_KDP
1366 volatile boolean_t mp_kdp_trap = FALSE;
1367 volatile unsigned long mp_kdp_ncpus;
1368 boolean_t mp_kdp_state;
1369
1370
1371 void
1372 mp_kdp_enter(void)
1373 {
1374 unsigned int cpu;
1375 unsigned int ncpus = 0;
1376 unsigned int my_cpu;
1377 uint64_t tsc_timeout;
1378
1379 DBG("mp_kdp_enter()\n");
1380
1381 /*
1382 * Here to enter the debugger.
1383 * In case of races, only one cpu is allowed to enter kdp after
1384 * stopping others.
1385 */
1386 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1387 my_cpu = cpu_number();
1388
1389 if (my_cpu == (unsigned) debugger_cpu) {
1390 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1391 kdp_reset();
1392 return;
1393 }
1394
1395 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1396 simple_lock(&mp_kdp_lock);
1397
1398 if (pmsafe_debug && !kdp_snapshot)
1399 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1400
1401 while (mp_kdp_trap) {
1402 simple_unlock(&mp_kdp_lock);
1403 DBG("mp_kdp_enter() race lost\n");
1404 #if MACH_KDP
1405 mp_kdp_wait(TRUE, FALSE);
1406 #endif
1407 simple_lock(&mp_kdp_lock);
1408 }
1409 debugger_cpu = my_cpu;
1410 ncpus = 1;
1411 mp_kdp_ncpus = 1; /* self */
1412 mp_kdp_trap = TRUE;
1413 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1414 simple_unlock(&mp_kdp_lock);
1415
1416 /*
1417 * Deliver a nudge to other cpus, counting how many
1418 */
1419 DBG("mp_kdp_enter() signaling other processors\n");
1420 if (force_immediate_debugger_NMI == FALSE) {
1421 for (cpu = 0; cpu < real_ncpus; cpu++) {
1422 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1423 continue;
1424 ncpus++;
1425 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1426 }
1427 /*
1428 * Wait other processors to synchronize
1429 */
1430 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1431
1432 /*
1433 * This timeout is rather arbitrary; we don't want to NMI
1434 * processors that are executing at potentially
1435 * "unsafe-to-interrupt" points such as the trampolines,
1436 * but neither do we want to lose state by waiting too long.
1437 */
1438 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL);
1439
1440 if (virtualized)
1441 tsc_timeout = ~0ULL;
1442
1443 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1444 /*
1445 * A TLB shootdown request may be pending--this would
1446 * result in the requesting processor waiting in
1447 * PMAP_UPDATE_TLBS() until this processor deals with it.
1448 * Process it, so it can now enter mp_kdp_wait()
1449 */
1450 handle_pending_TLB_flushes();
1451 cpu_pause();
1452 }
1453 /* If we've timed out, and some processor(s) are still unresponsive,
1454 * interrupt them with an NMI via the local APIC.
1455 */
1456 if (mp_kdp_ncpus != ncpus) {
1457 for (cpu = 0; cpu < real_ncpus; cpu++) {
1458 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1459 continue;
1460 if (cpu_signal_pending(cpu, MP_KDP))
1461 cpu_NMI_interrupt(cpu);
1462 }
1463 }
1464 }
1465 else
1466 for (cpu = 0; cpu < real_ncpus; cpu++) {
1467 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1468 continue;
1469 cpu_NMI_interrupt(cpu);
1470 }
1471
1472 DBG("mp_kdp_enter() %d processors done %s\n",
1473 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1474
1475 postcode(MP_KDP_ENTER);
1476 }
1477
1478 static boolean_t
1479 cpu_signal_pending(int cpu, mp_event_t event)
1480 {
1481 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1482 boolean_t retval = FALSE;
1483
1484 if (i_bit(event, signals))
1485 retval = TRUE;
1486 return retval;
1487 }
1488
1489 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1490 void *arg0, void *arg1)
1491 {
1492 if (lcpu > (real_ncpus - 1))
1493 return -1;
1494
1495 if (func == NULL)
1496 return -1;
1497
1498 kdp_xcpu_call_func.func = func;
1499 kdp_xcpu_call_func.ret = -1;
1500 kdp_xcpu_call_func.arg0 = arg0;
1501 kdp_xcpu_call_func.arg1 = arg1;
1502 kdp_xcpu_call_func.cpu = lcpu;
1503 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1504 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1505 cpu_pause();
1506 return kdp_xcpu_call_func.ret;
1507 }
1508
1509 static void
1510 kdp_x86_xcpu_poll(void)
1511 {
1512 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1513 kdp_xcpu_call_func.ret =
1514 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1515 kdp_xcpu_call_func.arg1,
1516 cpu_number());
1517 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1518 }
1519 }
1520
1521 static void
1522 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1523 {
1524 DBG("mp_kdp_wait()\n");
1525 /* If an I/O port has been specified as a debugging aid, issue a read */
1526 panic_io_port_read();
1527 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1528 #if CONFIG_MCA
1529 /* If we've trapped due to a machine-check, save MCA registers */
1530 mca_check_save();
1531 #endif
1532
1533 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1534 while (mp_kdp_trap || (isNMI == TRUE)) {
1535 /*
1536 * A TLB shootdown request may be pending--this would result
1537 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1538 * until this processor handles it.
1539 * Process it, so it can now enter mp_kdp_wait()
1540 */
1541 if (flush)
1542 handle_pending_TLB_flushes();
1543
1544 kdp_x86_xcpu_poll();
1545 cpu_pause();
1546 }
1547
1548 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1549 DBG("mp_kdp_wait() done\n");
1550 }
1551
1552 void
1553 mp_kdp_exit(void)
1554 {
1555 DBG("mp_kdp_exit()\n");
1556 debugger_cpu = -1;
1557 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1558
1559 debugger_exit_time = mach_absolute_time();
1560
1561 mp_kdp_trap = FALSE;
1562 __asm__ volatile("mfence");
1563
1564 /* Wait other processors to stop spinning. XXX needs timeout */
1565 DBG("mp_kdp_exit() waiting for processors to resume\n");
1566 while (mp_kdp_ncpus > 0) {
1567 /*
1568 * a TLB shootdown request may be pending... this would result in the requesting
1569 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1570 * Process it, so it can now enter mp_kdp_wait()
1571 */
1572 handle_pending_TLB_flushes();
1573
1574 cpu_pause();
1575 }
1576
1577 if (pmsafe_debug && !kdp_snapshot)
1578 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1579
1580 debugger_exit_time = mach_absolute_time();
1581
1582 DBG("mp_kdp_exit() done\n");
1583 (void) ml_set_interrupts_enabled(mp_kdp_state);
1584 postcode(0);
1585 }
1586 #endif /* MACH_KDP */
1587
1588 boolean_t
1589 mp_recent_debugger_activity() {
1590 uint64_t abstime = mach_absolute_time();
1591 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1592 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1593 }
1594
1595 /*ARGSUSED*/
1596 void
1597 init_ast_check(
1598 __unused processor_t processor)
1599 {
1600 }
1601
1602 void
1603 cause_ast_check(
1604 processor_t processor)
1605 {
1606 int cpu = processor->cpu_id;
1607
1608 if (cpu != cpu_number()) {
1609 i386_signal_cpu(cpu, MP_AST, ASYNC);
1610 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1611 }
1612 }
1613
1614 void
1615 slave_machine_init(void *param)
1616 {
1617 /*
1618 * Here in process context, but with interrupts disabled.
1619 */
1620 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1621
1622 if (param == FULL_SLAVE_INIT) {
1623 /*
1624 * Cold start
1625 */
1626 clock_init();
1627 cpu_machine_init(); /* Interrupts enabled hereafter */
1628 mp_cpus_call_cpu_init();
1629 } else {
1630 cpu_machine_init(); /* Interrupts enabled hereafter */
1631 }
1632 }
1633
1634 #undef cpu_number
1635 int cpu_number(void)
1636 {
1637 return get_cpu_number();
1638 }
1639
1640 static void
1641 cpu_prewarm_init()
1642 {
1643 int i;
1644
1645 simple_lock_init(&cpu_warm_lock, 0);
1646 queue_init(&cpu_warm_call_list);
1647 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1648 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1649 }
1650 }
1651
1652 static timer_call_t
1653 grab_warm_timer_call()
1654 {
1655 spl_t x;
1656 timer_call_t call = NULL;
1657
1658 x = splsched();
1659 simple_lock(&cpu_warm_lock);
1660 if (!queue_empty(&cpu_warm_call_list)) {
1661 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1662 }
1663 simple_unlock(&cpu_warm_lock);
1664 splx(x);
1665
1666 return call;
1667 }
1668
1669 static void
1670 free_warm_timer_call(timer_call_t call)
1671 {
1672 spl_t x;
1673
1674 x = splsched();
1675 simple_lock(&cpu_warm_lock);
1676 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1677 simple_unlock(&cpu_warm_lock);
1678 splx(x);
1679 }
1680
1681 /*
1682 * Runs in timer call context (interrupts disabled).
1683 */
1684 static void
1685 cpu_warm_timer_call_func(
1686 call_entry_param_t p0,
1687 __unused call_entry_param_t p1)
1688 {
1689 free_warm_timer_call((timer_call_t)p0);
1690 return;
1691 }
1692
1693 /*
1694 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1695 */
1696 static void
1697 _cpu_warm_setup(
1698 void *arg)
1699 {
1700 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1701
1702 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL);
1703 cwdp->cwd_result = 0;
1704
1705 return;
1706 }
1707
1708 /*
1709 * Not safe to call with interrupts disabled.
1710 */
1711 kern_return_t
1712 ml_interrupt_prewarm(
1713 uint64_t deadline)
1714 {
1715 struct cpu_warm_data cwd;
1716 timer_call_t call;
1717 cpu_t ct;
1718
1719 if (ml_get_interrupts_enabled() == FALSE) {
1720 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1721 }
1722
1723 /*
1724 * If the platform doesn't need our help, say that we succeeded.
1725 */
1726 if (!ml_get_interrupt_prewake_applicable()) {
1727 return KERN_SUCCESS;
1728 }
1729
1730 /*
1731 * Grab a timer call to use.
1732 */
1733 call = grab_warm_timer_call();
1734 if (call == NULL) {
1735 return KERN_RESOURCE_SHORTAGE;
1736 }
1737
1738 timer_call_setup(call, cpu_warm_timer_call_func, call);
1739 cwd.cwd_call = call;
1740 cwd.cwd_deadline = deadline;
1741 cwd.cwd_result = 0;
1742
1743 /*
1744 * For now, non-local interrupts happen on the master processor.
1745 */
1746 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1747 if (ct == 0) {
1748 free_warm_timer_call(call);
1749 return KERN_FAILURE;
1750 } else {
1751 return cwd.cwd_result;
1752 }
1753 }