]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
e90a298f9e104271bd8844efc16c2917002f043d
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 *
3 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 *
5 * This file contains Original Code and/or Modifications of Original Code
6 * as defined in and that are subject to the Apple Public Source License
7 * Version 2.0 (the 'License'). You may not use this file except in
8 * compliance with the License. The rights granted to you under the License
9 * may not be used to create, or enable the creation or redistribution of,
10 * unlawful or unlicensed copies of an Apple operating system, or to
11 * circumvent, violate, or enable the circumvention or violation of, any
12 * terms of an Apple operating system software license agreement.
13 *
14 * Please obtain a copy of the License at
15 * http://www.opensource.apple.com/apsl/ and read it before using this file.
16 *
17 * The Original Code and all software distributed under the License are
18 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 * Please see the License for the specific language governing rights and
23 * limitations under the License.
24 *
25 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 */
27 /*
28 * @OSF_COPYRIGHT@
29 */
30
31 #include <mach_rt.h>
32 #include <mach_kdb.h>
33 #include <mach_kdp.h>
34 #include <mach_ldebug.h>
35 #include <gprof.h>
36
37 #include <mach/mach_types.h>
38 #include <mach/kern_return.h>
39
40 #include <kern/kern_types.h>
41 #include <kern/startup.h>
42 #include <kern/timer_queue.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49 #include <kern/misc_protos.h>
50 #include <kern/etimer.h>
51 #include <kern/kalloc.h>
52 #include <kern/queue.h>
53
54 #include <vm/vm_map.h>
55 #include <vm/vm_kern.h>
56
57 #include <profiling/profile-mk.h>
58
59 #include <i386/proc_reg.h>
60 #include <i386/cpu_threads.h>
61 #include <i386/mp_desc.h>
62 #include <i386/misc_protos.h>
63 #include <i386/trap.h>
64 #include <i386/postcode.h>
65 #include <i386/machine_routines.h>
66 #include <i386/mp.h>
67 #include <i386/mp_events.h>
68 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
70 #include <i386/fpu.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/pmCPU.h>
73 #if CONFIG_MCA
74 #include <i386/machine_check.h>
75 #endif
76 #include <i386/acpi.h>
77
78 #include <chud/chud_xnu.h>
79 #include <chud/chud_xnu_private.h>
80
81 #include <sys/kdebug.h>
82 #if MACH_KDB
83 #include <machine/db_machdep.h>
84 #include <ddb/db_aout.h>
85 #include <ddb/db_access.h>
86 #include <ddb/db_sym.h>
87 #include <ddb/db_variables.h>
88 #include <ddb/db_command.h>
89 #include <ddb/db_output.h>
90 #include <ddb/db_expr.h>
91 #endif
92
93 #if MP_DEBUG
94 #define PAUSE delay(1000000)
95 #define DBG(x...) kprintf(x)
96 #else
97 #define DBG(x...)
98 #define PAUSE
99 #endif /* MP_DEBUG */
100
101 /* Debugging/test trace events: */
102 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
103 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
104 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
105 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
106 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
107
108 #define ABS(v) (((v) > 0)?(v):-(v))
109
110 void slave_boot_init(void);
111 void i386_cpu_IPI(int cpu);
112
113 #if MACH_KDB
114 static void mp_kdb_wait(void);
115 volatile boolean_t mp_kdb_trap = FALSE;
116 volatile long mp_kdb_ncpus = 0;
117 #endif
118
119 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
120 static void mp_rendezvous_action(void);
121 static void mp_broadcast_action(void);
122
123 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
124 static int NMIInterruptHandler(x86_saved_state_t *regs);
125
126 boolean_t smp_initialized = FALSE;
127 uint32_t TSC_sync_margin = 0xFFF;
128 volatile boolean_t force_immediate_debugger_NMI = FALSE;
129 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
130 decl_simple_lock_data(,mp_kdp_lock);
131
132 decl_lck_mtx_data(static, mp_cpu_boot_lock);
133 lck_mtx_ext_t mp_cpu_boot_lock_ext;
134
135 /* Variables needed for MP rendezvous. */
136 decl_simple_lock_data(,mp_rv_lock);
137 static void (*mp_rv_setup_func)(void *arg);
138 static void (*mp_rv_action_func)(void *arg);
139 static void (*mp_rv_teardown_func)(void *arg);
140 static void *mp_rv_func_arg;
141 static volatile int mp_rv_ncpus;
142 /* Cache-aligned barriers: */
143 static volatile long mp_rv_entry __attribute__((aligned(64)));
144 static volatile long mp_rv_exit __attribute__((aligned(64)));
145 static volatile long mp_rv_complete __attribute__((aligned(64)));
146
147 volatile uint64_t debugger_entry_time;
148 volatile uint64_t debugger_exit_time;
149 #if MACH_KDP
150
151 extern int kdp_snapshot;
152 static struct _kdp_xcpu_call_func {
153 kdp_x86_xcpu_func_t func;
154 void *arg0, *arg1;
155 volatile long ret;
156 volatile uint16_t cpu;
157 } kdp_xcpu_call_func = {
158 .cpu = KDP_XCPU_NONE
159 };
160
161 #endif
162
163 /* Variables needed for MP broadcast. */
164 static void (*mp_bc_action_func)(void *arg);
165 static void *mp_bc_func_arg;
166 static int mp_bc_ncpus;
167 static volatile long mp_bc_count;
168 decl_lck_mtx_data(static, mp_bc_lock);
169 lck_mtx_ext_t mp_bc_lock_ext;
170 static volatile int debugger_cpu = -1;
171 volatile long NMIPI_acks = 0;
172
173 static void mp_cpus_call_init(void);
174 static void mp_cpus_call_cpu_init(void);
175 static void mp_cpus_call_action(void);
176 static void mp_call_PM(void);
177
178 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
179
180 /* PAL-related routines */
181 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
182 int ipi_vector, i386_intr_func_t ipi_handler);
183 void i386_start_cpu(int lapic_id, int cpu_num);
184 void i386_send_NMI(int cpu);
185
186 #if GPROF
187 /*
188 * Initialize dummy structs for profiling. These aren't used but
189 * allows hertz_tick() to be built with GPROF defined.
190 */
191 struct profile_vars _profile_vars;
192 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
193 #define GPROF_INIT() \
194 { \
195 int i; \
196 \
197 /* Hack to initialize pointers to unused profiling structs */ \
198 for (i = 1; i < MAX_CPUS; i++) \
199 _profile_vars_cpus[i] = &_profile_vars; \
200 }
201 #else
202 #define GPROF_INIT()
203 #endif /* GPROF */
204
205 static lck_grp_t smp_lck_grp;
206 static lck_grp_attr_t smp_lck_grp_attr;
207
208 #define NUM_CPU_WARM_CALLS 20
209 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
210 queue_head_t cpu_warm_call_list;
211 decl_simple_lock_data(static, cpu_warm_lock);
212
213 typedef struct cpu_warm_data {
214 timer_call_t cwd_call;
215 uint64_t cwd_deadline;
216 int cwd_result;
217 } *cpu_warm_data_t;
218
219 static void cpu_prewarm_init(void);
220 static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
221 static void _cpu_warm_setup(void *arg);
222 static timer_call_t grab_warm_timer_call(void);
223 static void free_warm_timer_call(timer_call_t call);
224
225 void
226 smp_init(void)
227 {
228 simple_lock_init(&mp_kdp_lock, 0);
229 simple_lock_init(&mp_rv_lock, 0);
230 lck_grp_attr_setdefault(&smp_lck_grp_attr);
231 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
232 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
233 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
234 console_init();
235
236 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
237 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
238 return;
239
240 cpu_thread_init();
241
242 GPROF_INIT();
243 DBGLOG_CPU_INIT(master_cpu);
244
245 mp_cpus_call_init();
246 mp_cpus_call_cpu_init();
247
248 if (PE_parse_boot_argn("TSC_sync_margin",
249 &TSC_sync_margin, sizeof(TSC_sync_margin)))
250 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
251 smp_initialized = TRUE;
252
253 cpu_prewarm_init();
254
255 return;
256 }
257
258 typedef struct {
259 int target_cpu;
260 int target_lapic;
261 int starter_cpu;
262 } processor_start_info_t;
263 static processor_start_info_t start_info __attribute__((aligned(64)));
264
265 /*
266 * Cache-alignment is to avoid cross-cpu false-sharing interference.
267 */
268 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
269 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
270 static volatile uint64_t tsc_target __attribute__((aligned(64)));
271
272 /*
273 * Poll a CPU to see when it has marked itself as running.
274 */
275 static void
276 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
277 {
278 while (iters-- > 0) {
279 if (cpu_datap(slot_num)->cpu_running)
280 break;
281 delay(usecdelay);
282 }
283 }
284
285 /*
286 * Quickly bring a CPU back online which has been halted.
287 */
288 kern_return_t
289 intel_startCPU_fast(int slot_num)
290 {
291 kern_return_t rc;
292
293 /*
294 * Try to perform a fast restart
295 */
296 rc = pmCPUExitHalt(slot_num);
297 if (rc != KERN_SUCCESS)
298 /*
299 * The CPU was not eligible for a fast restart.
300 */
301 return(rc);
302
303 /*
304 * Wait until the CPU is back online.
305 */
306 mp_disable_preemption();
307
308 /*
309 * We use short pauses (1us) for low latency. 30,000 iterations is
310 * longer than a full restart would require so it should be more
311 * than long enough.
312 */
313
314 mp_wait_for_cpu_up(slot_num, 30000, 1);
315 mp_enable_preemption();
316
317 /*
318 * Check to make sure that the CPU is really running. If not,
319 * go through the slow path.
320 */
321 if (cpu_datap(slot_num)->cpu_running)
322 return(KERN_SUCCESS);
323 else
324 return(KERN_FAILURE);
325 }
326
327 static void
328 started_cpu(void)
329 {
330 /* Here on the started cpu with cpu_running set TRUE */
331
332 if (TSC_sync_margin &&
333 start_info.target_cpu == cpu_number()) {
334 /*
335 * I've just started-up, synchronize again with the starter cpu
336 * and then snap my TSC.
337 */
338 tsc_target = 0;
339 atomic_decl(&tsc_entry_barrier, 1);
340 while (tsc_entry_barrier != 0)
341 ; /* spin for starter and target at barrier */
342 tsc_target = rdtsc64();
343 atomic_decl(&tsc_exit_barrier, 1);
344 }
345 }
346
347 static void
348 start_cpu(void *arg)
349 {
350 int i = 1000;
351 processor_start_info_t *psip = (processor_start_info_t *) arg;
352
353 /* Ignore this if the current processor is not the starter */
354 if (cpu_number() != psip->starter_cpu)
355 return;
356
357 i386_start_cpu(psip->target_lapic, psip->target_cpu);
358
359 #ifdef POSTCODE_DELAY
360 /* Wait much longer if postcodes are displayed for a delay period. */
361 i *= 10000;
362 #endif
363 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
364 if (TSC_sync_margin &&
365 cpu_datap(psip->target_cpu)->cpu_running) {
366 /*
367 * Compare the TSC from the started processor with ours.
368 * Report and log/panic if it diverges by more than
369 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
370 * can be overriden by boot-arg (with 0 meaning no checking).
371 */
372 uint64_t tsc_starter;
373 int64_t tsc_delta;
374 atomic_decl(&tsc_entry_barrier, 1);
375 while (tsc_entry_barrier != 0)
376 ; /* spin for both processors at barrier */
377 tsc_starter = rdtsc64();
378 atomic_decl(&tsc_exit_barrier, 1);
379 while (tsc_exit_barrier != 0)
380 ; /* spin for target to store its TSC */
381 tsc_delta = tsc_target - tsc_starter;
382 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
383 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
384 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
385 #if DEBUG
386 panic(
387 #else
388 printf(
389 #endif
390 "Unsynchronized TSC for cpu %d: "
391 "0x%016llx, delta 0x%llx\n",
392 psip->target_cpu, tsc_target, tsc_delta);
393 }
394 }
395 }
396
397 extern char prot_mode_gdt[];
398 extern char slave_boot_base[];
399 extern char real_mode_bootstrap_base[];
400 extern char real_mode_bootstrap_end[];
401 extern char slave_boot_end[];
402
403 kern_return_t
404 intel_startCPU(
405 int slot_num)
406 {
407 int lapic = cpu_to_lapic[slot_num];
408 boolean_t istate;
409
410 assert(lapic != -1);
411
412 DBGLOG_CPU_INIT(slot_num);
413
414 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
415 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
416
417 /*
418 * Initialize (or re-initialize) the descriptor tables for this cpu.
419 * Propagate processor mode to slave.
420 */
421 if (cpu_mode_is64bit())
422 cpu_desc_init64(cpu_datap(slot_num));
423 else
424 cpu_desc_init(cpu_datap(slot_num));
425
426 /* Serialize use of the slave boot stack, etc. */
427 lck_mtx_lock(&mp_cpu_boot_lock);
428
429 istate = ml_set_interrupts_enabled(FALSE);
430 if (slot_num == get_cpu_number()) {
431 ml_set_interrupts_enabled(istate);
432 lck_mtx_unlock(&mp_cpu_boot_lock);
433 return KERN_SUCCESS;
434 }
435
436 start_info.starter_cpu = cpu_number();
437 start_info.target_cpu = slot_num;
438 start_info.target_lapic = lapic;
439 tsc_entry_barrier = 2;
440 tsc_exit_barrier = 2;
441
442 /*
443 * Perform the processor startup sequence with all running
444 * processors rendezvous'ed. This is required during periods when
445 * the cache-disable bit is set for MTRR/PAT initialization.
446 */
447 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
448
449 start_info.target_cpu = 0;
450
451 ml_set_interrupts_enabled(istate);
452 lck_mtx_unlock(&mp_cpu_boot_lock);
453
454 if (!cpu_datap(slot_num)->cpu_running) {
455 kprintf("Failed to start CPU %02d\n", slot_num);
456 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
457 delay(1000000);
458 halt_cpu();
459 return KERN_SUCCESS;
460 } else {
461 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
462 return KERN_SUCCESS;
463 }
464 }
465
466 #if MP_DEBUG
467 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
468 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
469
470 MP_EVENT_NAME_DECL();
471
472 #endif /* MP_DEBUG */
473
474 int
475 cpu_signal_handler(x86_saved_state_t *regs)
476 {
477 int my_cpu;
478 volatile int *my_word;
479 #if MACH_KDB && MACH_ASSERT
480 int i=100;
481 #endif /* MACH_KDB && MACH_ASSERT */
482
483 SCHED_STATS_IPI(current_processor());
484
485 my_cpu = cpu_number();
486 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
487 /* Store the initial set of signals for diagnostics. New
488 * signals could arrive while these are being processed
489 * so it's no more than a hint.
490 */
491
492 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
493
494 do {
495 #if MACH_KDB && MACH_ASSERT
496 if (i-- <= 0)
497 Debugger("cpu_signal_handler: signals did not clear");
498 #endif /* MACH_KDB && MACH_ASSERT */
499 #if MACH_KDP
500 if (i_bit(MP_KDP, my_word)) {
501 DBGLOG(cpu_handle,my_cpu,MP_KDP);
502 i_bit_clear(MP_KDP, my_word);
503 /* Ensure that the i386_kernel_state at the base of the
504 * current thread's stack (if any) is synchronized with the
505 * context at the moment of the interrupt, to facilitate
506 * access through the debugger.
507 */
508 sync_iss_to_iks(regs);
509 if (pmsafe_debug && !kdp_snapshot)
510 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
511 mp_kdp_wait(TRUE, FALSE);
512 if (pmsafe_debug && !kdp_snapshot)
513 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
514 } else
515 #endif /* MACH_KDP */
516 if (i_bit(MP_TLB_FLUSH, my_word)) {
517 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
518 i_bit_clear(MP_TLB_FLUSH, my_word);
519 pmap_update_interrupt();
520 } else if (i_bit(MP_AST, my_word)) {
521 DBGLOG(cpu_handle,my_cpu,MP_AST);
522 i_bit_clear(MP_AST, my_word);
523 ast_check(cpu_to_processor(my_cpu));
524 #if MACH_KDB
525 } else if (i_bit(MP_KDB, my_word)) {
526
527 i_bit_clear(MP_KDB, my_word);
528 current_cpu_datap()->cpu_kdb_is_slave++;
529 mp_kdb_wait();
530 current_cpu_datap()->cpu_kdb_is_slave--;
531 #endif /* MACH_KDB */
532 } else if (i_bit(MP_RENDEZVOUS, my_word)) {
533 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
534 i_bit_clear(MP_RENDEZVOUS, my_word);
535 mp_rendezvous_action();
536 } else if (i_bit(MP_BROADCAST, my_word)) {
537 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
538 i_bit_clear(MP_BROADCAST, my_word);
539 mp_broadcast_action();
540 } else if (i_bit(MP_CHUD, my_word)) {
541 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
542 i_bit_clear(MP_CHUD, my_word);
543 chudxnu_cpu_signal_handler();
544 } else if (i_bit(MP_CALL, my_word)) {
545 DBGLOG(cpu_handle,my_cpu,MP_CALL);
546 i_bit_clear(MP_CALL, my_word);
547 mp_cpus_call_action();
548 } else if (i_bit(MP_CALL_PM, my_word)) {
549 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
550 i_bit_clear(MP_CALL_PM, my_word);
551 mp_call_PM();
552 }
553 } while (*my_word);
554
555 return 0;
556 }
557
558 static int
559 NMIInterruptHandler(x86_saved_state_t *regs)
560 {
561 void *stackptr;
562
563 if (panic_active() && !panicDebugging) {
564 if (pmsafe_debug)
565 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
566 for(;;)
567 cpu_pause();
568 }
569
570 atomic_incl(&NMIPI_acks, 1);
571 sync_iss_to_iks_unconditionally(regs);
572 #if defined (__i386__)
573 __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
574 #elif defined (__x86_64__)
575 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
576 #endif
577
578 if (cpu_number() == debugger_cpu)
579 goto NMExit;
580
581 if (spinlock_timed_out) {
582 char pstr[160];
583 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
584 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
585 } else if (pmap_tlb_flush_timeout == TRUE) {
586 char pstr[128];
587 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
588 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
589 }
590
591 #if MACH_KDP
592 if (pmsafe_debug && !kdp_snapshot)
593 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
594 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
595 mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
596 if (pmsafe_debug && !kdp_snapshot)
597 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
598 #endif
599 NMExit:
600 return 1;
601 }
602
603
604 /*
605 * cpu_interrupt is really just to be used by the scheduler to
606 * get a CPU's attention it may not always issue an IPI. If an
607 * IPI is always needed then use i386_cpu_IPI.
608 */
609 void
610 cpu_interrupt(int cpu)
611 {
612 boolean_t did_IPI = FALSE;
613
614 if (smp_initialized
615 && pmCPUExitIdle(cpu_datap(cpu))) {
616 i386_cpu_IPI(cpu);
617 did_IPI = TRUE;
618 }
619
620 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
621 }
622
623 /*
624 * Send a true NMI via the local APIC to the specified CPU.
625 */
626 void
627 cpu_NMI_interrupt(int cpu)
628 {
629 if (smp_initialized) {
630 i386_send_NMI(cpu);
631 }
632 }
633
634 static void (* volatile mp_PM_func)(void) = NULL;
635
636 static void
637 mp_call_PM(void)
638 {
639 assert(!ml_get_interrupts_enabled());
640
641 if (mp_PM_func != NULL)
642 mp_PM_func();
643 }
644
645 void
646 cpu_PM_interrupt(int cpu)
647 {
648 assert(!ml_get_interrupts_enabled());
649
650 if (mp_PM_func != NULL) {
651 if (cpu == cpu_number())
652 mp_PM_func();
653 else
654 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
655 }
656 }
657
658 void
659 PM_interrupt_register(void (*fn)(void))
660 {
661 mp_PM_func = fn;
662 }
663
664 void
665 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
666 {
667 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
668 uint64_t tsc_timeout;
669
670
671 if (!cpu_datap(cpu)->cpu_running)
672 return;
673
674 if (event == MP_TLB_FLUSH)
675 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
676
677 DBGLOG(cpu_signal, cpu, event);
678
679 i_bit_set(event, signals);
680 i386_cpu_IPI(cpu);
681 if (mode == SYNC) {
682 again:
683 tsc_timeout = rdtsc64() + (1000*1000*1000);
684 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
685 cpu_pause();
686 }
687 if (i_bit(event, signals)) {
688 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
689 cpu, event);
690 goto again;
691 }
692 }
693 if (event == MP_TLB_FLUSH)
694 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
695 }
696
697 /*
698 * Send event to all running cpus.
699 * Called with the topology locked.
700 */
701 void
702 i386_signal_cpus(mp_event_t event, mp_sync_t mode)
703 {
704 unsigned int cpu;
705 unsigned int my_cpu = cpu_number();
706
707 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
708
709 for (cpu = 0; cpu < real_ncpus; cpu++) {
710 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
711 continue;
712 i386_signal_cpu(cpu, event, mode);
713 }
714 }
715
716 /*
717 * Return the number of running cpus.
718 * Called with the topology locked.
719 */
720 int
721 i386_active_cpus(void)
722 {
723 unsigned int cpu;
724 unsigned int ncpus = 0;
725
726 assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
727
728 for (cpu = 0; cpu < real_ncpus; cpu++) {
729 if (cpu_datap(cpu)->cpu_running)
730 ncpus++;
731 }
732 return(ncpus);
733 }
734
735 /*
736 * All-CPU rendezvous:
737 * - CPUs are signalled,
738 * - all execute the setup function (if specified),
739 * - rendezvous (i.e. all cpus reach a barrier),
740 * - all execute the action function (if specified),
741 * - rendezvous again,
742 * - execute the teardown function (if specified), and then
743 * - resume.
744 *
745 * Note that the supplied external functions _must_ be reentrant and aware
746 * that they are running in parallel and in an unknown lock context.
747 */
748
749 static void
750 mp_rendezvous_action(void)
751 {
752 boolean_t intrs_enabled;
753
754 /* setup function */
755 if (mp_rv_setup_func != NULL)
756 mp_rv_setup_func(mp_rv_func_arg);
757
758 intrs_enabled = ml_get_interrupts_enabled();
759
760 /* spin on entry rendezvous */
761 atomic_incl(&mp_rv_entry, 1);
762 while (mp_rv_entry < mp_rv_ncpus) {
763 /* poll for pesky tlb flushes if interrupts disabled */
764 if (!intrs_enabled)
765 handle_pending_TLB_flushes();
766 cpu_pause();
767 }
768
769 /* action function */
770 if (mp_rv_action_func != NULL)
771 mp_rv_action_func(mp_rv_func_arg);
772
773 /* spin on exit rendezvous */
774 atomic_incl(&mp_rv_exit, 1);
775 while (mp_rv_exit < mp_rv_ncpus) {
776 if (!intrs_enabled)
777 handle_pending_TLB_flushes();
778 cpu_pause();
779 }
780
781 /* teardown function */
782 if (mp_rv_teardown_func != NULL)
783 mp_rv_teardown_func(mp_rv_func_arg);
784
785 /* Bump completion count */
786 atomic_incl(&mp_rv_complete, 1);
787 }
788
789 void
790 mp_rendezvous(void (*setup_func)(void *),
791 void (*action_func)(void *),
792 void (*teardown_func)(void *),
793 void *arg)
794 {
795
796 if (!smp_initialized) {
797 if (setup_func != NULL)
798 setup_func(arg);
799 if (action_func != NULL)
800 action_func(arg);
801 if (teardown_func != NULL)
802 teardown_func(arg);
803 return;
804 }
805
806 /* obtain rendezvous lock */
807 simple_lock(&mp_rv_lock);
808
809 /* set static function pointers */
810 mp_rv_setup_func = setup_func;
811 mp_rv_action_func = action_func;
812 mp_rv_teardown_func = teardown_func;
813 mp_rv_func_arg = arg;
814
815 mp_rv_entry = 0;
816 mp_rv_exit = 0;
817 mp_rv_complete = 0;
818
819 /*
820 * signal other processors, which will call mp_rendezvous_action()
821 * with interrupts disabled
822 */
823 simple_lock(&x86_topo_lock);
824 mp_rv_ncpus = i386_active_cpus();
825 i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
826 simple_unlock(&x86_topo_lock);
827
828 /* call executor function on this cpu */
829 mp_rendezvous_action();
830
831 /*
832 * Spin for everyone to complete.
833 * This is necessary to ensure that all processors have proceeded
834 * from the exit barrier before we release the rendezvous structure.
835 */
836 while (mp_rv_complete < mp_rv_ncpus) {
837 cpu_pause();
838 }
839
840 /* Tidy up */
841 mp_rv_setup_func = NULL;
842 mp_rv_action_func = NULL;
843 mp_rv_teardown_func = NULL;
844 mp_rv_func_arg = NULL;
845
846 /* release lock */
847 simple_unlock(&mp_rv_lock);
848 }
849
850 void
851 mp_rendezvous_break_lock(void)
852 {
853 simple_lock_init(&mp_rv_lock, 0);
854 }
855
856 static void
857 setup_disable_intrs(__unused void * param_not_used)
858 {
859 /* disable interrupts before the first barrier */
860 boolean_t intr = ml_set_interrupts_enabled(FALSE);
861
862 current_cpu_datap()->cpu_iflag = intr;
863 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
864 }
865
866 static void
867 teardown_restore_intrs(__unused void * param_not_used)
868 {
869 /* restore interrupt flag following MTRR changes */
870 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
871 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
872 }
873
874 /*
875 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
876 * This is exported for use by kexts.
877 */
878 void
879 mp_rendezvous_no_intrs(
880 void (*action_func)(void *),
881 void *arg)
882 {
883 mp_rendezvous(setup_disable_intrs,
884 action_func,
885 teardown_restore_intrs,
886 arg);
887 }
888
889
890 typedef struct {
891 queue_chain_t link; /* queue linkage */
892 void (*func)(void *,void *); /* routine to call */
893 void *arg0; /* routine's 1st arg */
894 void *arg1; /* routine's 2nd arg */
895 volatile long *countp; /* completion counter */
896 } mp_call_t;
897
898 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
899 static queue_head_t mp_cpus_call_freelist;
900 static queue_head_t mp_cpus_call_queue[MAX_CPUS];
901 /*
902 * The free list and the per-cpu call queues are protected by the following
903 * lock which is taken wil interrupts disabled.
904 */
905 decl_simple_lock_data(,mp_cpus_call_lock);
906
907 static inline boolean_t
908 mp_call_lock(void)
909 {
910 boolean_t intrs_enabled;
911
912 intrs_enabled = ml_set_interrupts_enabled(FALSE);
913 simple_lock(&mp_cpus_call_lock);
914
915 return intrs_enabled;
916 }
917
918 static inline boolean_t
919 mp_call_is_locked(void)
920 {
921 return !ml_get_interrupts_enabled() &&
922 hw_lock_held((hw_lock_t)&mp_cpus_call_lock);
923 }
924
925 static inline void
926 mp_call_unlock(boolean_t intrs_enabled)
927 {
928 simple_unlock(&mp_cpus_call_lock);
929 ml_set_interrupts_enabled(intrs_enabled);
930 }
931
932 static inline mp_call_t *
933 mp_call_alloc(void)
934 {
935 mp_call_t *callp;
936
937 assert(mp_call_is_locked());
938 if (queue_empty(&mp_cpus_call_freelist))
939 return NULL;
940 queue_remove_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
941 return callp;
942 }
943
944 static inline void
945 mp_call_free(mp_call_t *callp)
946 {
947 assert(mp_call_is_locked());
948 queue_enter_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
949 }
950
951 static inline mp_call_t *
952 mp_call_dequeue(queue_t call_queue)
953 {
954 mp_call_t *callp;
955
956 assert(mp_call_is_locked());
957 if (queue_empty(call_queue))
958 return NULL;
959 queue_remove_first(call_queue, callp, typeof(callp), link);
960 return callp;
961 }
962
963 /* Called on the boot processor to initialize global structures */
964 static void
965 mp_cpus_call_init(void)
966 {
967 DBG("mp_cpus_call_init()\n");
968 simple_lock_init(&mp_cpus_call_lock, 0);
969 queue_init(&mp_cpus_call_freelist);
970 }
971
972 /*
973 * Called by each processor to add call buffers to the free list
974 * and to initialize the per-cpu call queue.
975 * Also called but ignored on slave processors on re-start/wake.
976 */
977 static void
978 mp_cpus_call_cpu_init(void)
979 {
980 boolean_t intrs_enabled;
981 int i;
982 mp_call_t *callp;
983
984 if (mp_cpus_call_queue[cpu_number()].next != NULL)
985 return; /* restart/wake case: called already */
986
987 queue_init(&mp_cpus_call_queue[cpu_number()]);
988 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
989 callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
990 intrs_enabled = mp_call_lock();
991 mp_call_free(callp);
992 mp_call_unlock(intrs_enabled);
993 }
994
995 DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
996 }
997
998 /*
999 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1000 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1001 */
1002 static void
1003 mp_cpus_call_action(void)
1004 {
1005 queue_t cpu_head;
1006 boolean_t intrs_enabled;
1007 mp_call_t *callp;
1008 mp_call_t call;
1009
1010 assert(!ml_get_interrupts_enabled());
1011 cpu_head = &mp_cpus_call_queue[cpu_number()];
1012 intrs_enabled = mp_call_lock();
1013 while ((callp = mp_call_dequeue(cpu_head)) != NULL) {
1014 /* Copy call request to the stack to free buffer */
1015 call = *callp;
1016 mp_call_free(callp);
1017 if (call.func != NULL) {
1018 mp_call_unlock(intrs_enabled);
1019 KERNEL_DEBUG_CONSTANT(
1020 TRACE_MP_CPUS_CALL_ACTION,
1021 call.func, call.arg0, call.arg1, call.countp, 0);
1022 call.func(call.arg0, call.arg1);
1023 (void) mp_call_lock();
1024 }
1025 if (call.countp != NULL)
1026 atomic_incl(call.countp, 1);
1027 }
1028 mp_call_unlock(intrs_enabled);
1029 }
1030
1031 static boolean_t
1032 mp_call_queue(
1033 int cpu,
1034 void (*action_func)(void *, void *),
1035 void *arg0,
1036 void *arg1,
1037 volatile long *countp)
1038 {
1039 queue_t cpu_head = &mp_cpus_call_queue[cpu];
1040 mp_call_t *callp;
1041
1042 assert(mp_call_is_locked());
1043 callp = mp_call_alloc();
1044 if (callp == NULL)
1045 return FALSE;
1046
1047 callp->func = action_func;
1048 callp->arg0 = arg0;
1049 callp->arg1 = arg1;
1050 callp->countp = countp;
1051
1052 queue_enter(cpu_head, callp, typeof(callp), link);
1053
1054 return TRUE;
1055 }
1056
1057 /*
1058 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1059 * Possible modes are:
1060 * SYNC: function is called serially on target cpus in logical cpu order
1061 * waiting for each call to be acknowledged before proceeding
1062 * ASYNC: function call is queued to the specified cpus
1063 * waiting for all calls to complete in parallel before returning
1064 * NOSYNC: function calls are queued
1065 * but we return before confirmation of calls completing.
1066 * The action function may be NULL.
1067 * The cpu mask may include the local cpu. Offline cpus are ignored.
1068 * The return value is the number of cpus on which the call was made or queued.
1069 */
1070 cpu_t
1071 mp_cpus_call(
1072 cpumask_t cpus,
1073 mp_sync_t mode,
1074 void (*action_func)(void *),
1075 void *arg)
1076 {
1077 return mp_cpus_call1(
1078 cpus,
1079 mode,
1080 (void (*)(void *,void *))action_func,
1081 arg,
1082 NULL,
1083 NULL,
1084 NULL);
1085 }
1086
1087 static void
1088 mp_cpus_call_wait(boolean_t intrs_enabled,
1089 long mp_cpus_signals,
1090 volatile long *mp_cpus_calls)
1091 {
1092 queue_t cpu_head;
1093
1094 cpu_head = &mp_cpus_call_queue[cpu_number()];
1095
1096 while (*mp_cpus_calls < mp_cpus_signals) {
1097 if (!intrs_enabled) {
1098 if (!queue_empty(cpu_head))
1099 mp_cpus_call_action();
1100
1101 handle_pending_TLB_flushes();
1102 }
1103 cpu_pause();
1104 }
1105 }
1106
1107 cpu_t
1108 mp_cpus_call1(
1109 cpumask_t cpus,
1110 mp_sync_t mode,
1111 void (*action_func)(void *, void *),
1112 void *arg0,
1113 void *arg1,
1114 cpumask_t *cpus_calledp,
1115 cpumask_t *cpus_notcalledp)
1116 {
1117 cpu_t cpu;
1118 boolean_t intrs_enabled = FALSE;
1119 boolean_t call_self = FALSE;
1120 cpumask_t cpus_called = 0;
1121 cpumask_t cpus_notcalled = 0;
1122 long mp_cpus_signals = 0;
1123 volatile long mp_cpus_calls = 0;
1124
1125 KERNEL_DEBUG_CONSTANT(
1126 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1127 cpus, mode, action_func, arg0, arg1);
1128
1129 if (!smp_initialized) {
1130 if ((cpus & CPUMASK_SELF) == 0)
1131 goto out;
1132 if (action_func != NULL) {
1133 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1134 action_func(arg0, arg1);
1135 ml_set_interrupts_enabled(intrs_enabled);
1136 }
1137 call_self = TRUE;
1138 goto out;
1139 }
1140
1141 /*
1142 * Queue the call for each non-local requested cpu.
1143 * The topo lock is not taken. Instead we sniff the cpu_running state
1144 * and then re-check it after taking the call lock. A cpu being taken
1145 * offline runs the action function after clearing the cpu_running.
1146 */
1147 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1148 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1149 !cpu_datap(cpu)->cpu_running)
1150 continue;
1151 if (cpu == (cpu_t) cpu_number()) {
1152 /*
1153 * We don't IPI ourself and if calling asynchronously,
1154 * we defer our call until we have signalled all others.
1155 */
1156 call_self = TRUE;
1157 cpus_called |= cpu_to_cpumask(cpu);
1158 if (mode == SYNC && action_func != NULL) {
1159 KERNEL_DEBUG_CONSTANT(
1160 TRACE_MP_CPUS_CALL_LOCAL,
1161 action_func, arg0, arg1, 0, 0);
1162 action_func(arg0, arg1);
1163 }
1164 } else {
1165 /*
1166 * Here to queue a call to cpu and IPI.
1167 * Spinning for request buffer unless NOSYNC.
1168 */
1169 queue_call:
1170 intrs_enabled = mp_call_lock();
1171 if (!cpu_datap(cpu)->cpu_running) {
1172 mp_call_unlock(intrs_enabled);
1173 continue;
1174 }
1175 if (mode == NOSYNC) {
1176 if (!mp_call_queue(cpu, action_func, arg0, arg1,
1177 NULL)) {
1178 cpus_notcalled |= cpu_to_cpumask(cpu);
1179 mp_call_unlock(intrs_enabled);
1180 KERNEL_DEBUG_CONSTANT(
1181 TRACE_MP_CPUS_CALL_NOBUF,
1182 cpu, 0, 0, 0, 0);
1183 continue;
1184 }
1185 } else {
1186 if (!mp_call_queue(cpu, action_func, arg0, arg1,
1187 &mp_cpus_calls)) {
1188 mp_call_unlock(intrs_enabled);
1189 KERNEL_DEBUG_CONSTANT(
1190 TRACE_MP_CPUS_CALL_NOBUF,
1191 cpu, 0, 0, 0, 0);
1192 if (!intrs_enabled) {
1193 mp_cpus_call_action();
1194 handle_pending_TLB_flushes();
1195 }
1196 cpu_pause();
1197 goto queue_call;
1198 }
1199 }
1200 mp_cpus_signals++;
1201 cpus_called |= cpu_to_cpumask(cpu);
1202 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1203 mp_call_unlock(intrs_enabled);
1204 if (mode == SYNC) {
1205 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1206 }
1207 }
1208 }
1209
1210 /* Call locally if mode not SYNC */
1211 if (mode != SYNC && call_self ) {
1212 KERNEL_DEBUG_CONSTANT(
1213 TRACE_MP_CPUS_CALL_LOCAL,
1214 action_func, arg0, arg1, 0, 0);
1215 if (action_func != NULL) {
1216 ml_set_interrupts_enabled(FALSE);
1217 action_func(arg0, arg1);
1218 ml_set_interrupts_enabled(intrs_enabled);
1219 }
1220 }
1221
1222 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1223 if (mode == ASYNC) {
1224 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
1225 }
1226
1227 out:
1228 cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
1229
1230 if (cpus_calledp)
1231 *cpus_calledp = cpus_called;
1232 if (cpus_notcalledp)
1233 *cpus_notcalledp = cpus_notcalled;
1234
1235 KERNEL_DEBUG_CONSTANT(
1236 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1237 cpu, cpus_called, cpus_notcalled, 0, 0);
1238
1239 return cpu;
1240 }
1241
1242
1243 static void
1244 mp_broadcast_action(void)
1245 {
1246 /* call action function */
1247 if (mp_bc_action_func != NULL)
1248 mp_bc_action_func(mp_bc_func_arg);
1249
1250 /* if we're the last one through, wake up the instigator */
1251 if (atomic_decl_and_test(&mp_bc_count, 1))
1252 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1253 }
1254
1255 /*
1256 * mp_broadcast() runs a given function on all active cpus.
1257 * The caller blocks until the functions has run on all cpus.
1258 * The caller will also block if there is another pending braodcast.
1259 */
1260 void
1261 mp_broadcast(
1262 void (*action_func)(void *),
1263 void *arg)
1264 {
1265 if (!smp_initialized) {
1266 if (action_func != NULL)
1267 action_func(arg);
1268 return;
1269 }
1270
1271 /* obtain broadcast lock */
1272 lck_mtx_lock(&mp_bc_lock);
1273
1274 /* set static function pointers */
1275 mp_bc_action_func = action_func;
1276 mp_bc_func_arg = arg;
1277
1278 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1279
1280 /*
1281 * signal other processors, which will call mp_broadcast_action()
1282 */
1283 simple_lock(&x86_topo_lock);
1284 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */
1285 mp_bc_count = mp_bc_ncpus;
1286 i386_signal_cpus(MP_BROADCAST, ASYNC);
1287
1288 /* call executor function on this cpu */
1289 mp_broadcast_action();
1290 simple_unlock(&x86_topo_lock);
1291
1292 /* block for all cpus to have run action_func */
1293 if (mp_bc_ncpus > 1)
1294 thread_block(THREAD_CONTINUE_NULL);
1295 else
1296 clear_wait(current_thread(), THREAD_AWAKENED);
1297
1298 /* release lock */
1299 lck_mtx_unlock(&mp_bc_lock);
1300 }
1301
1302 void
1303 i386_activate_cpu(void)
1304 {
1305 cpu_data_t *cdp = current_cpu_datap();
1306
1307 assert(!ml_get_interrupts_enabled());
1308
1309 if (!smp_initialized) {
1310 cdp->cpu_running = TRUE;
1311 return;
1312 }
1313
1314 simple_lock(&x86_topo_lock);
1315 cdp->cpu_running = TRUE;
1316 started_cpu();
1317 simple_unlock(&x86_topo_lock);
1318 }
1319
1320 extern void etimer_timer_expire(void *arg);
1321
1322 void
1323 i386_deactivate_cpu(void)
1324 {
1325 cpu_data_t *cdp = current_cpu_datap();
1326
1327 assert(!ml_get_interrupts_enabled());
1328
1329 simple_lock(&x86_topo_lock);
1330 cdp->cpu_running = FALSE;
1331 simple_unlock(&x86_topo_lock);
1332
1333 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1334 cdp->rtclock_timer.deadline = EndOfAllTime;
1335 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
1336
1337 /*
1338 * In case a rendezvous/braodcast/call was initiated to this cpu
1339 * before we cleared cpu_running, we must perform any actions due.
1340 */
1341 if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals))
1342 mp_rendezvous_action();
1343 if (i_bit(MP_BROADCAST, &cdp->cpu_signals))
1344 mp_broadcast_action();
1345 if (i_bit(MP_CALL, &cdp->cpu_signals))
1346 mp_cpus_call_action();
1347 cdp->cpu_signals = 0; /* all clear */
1348 }
1349
1350 int pmsafe_debug = 1;
1351
1352 #if MACH_KDP
1353 volatile boolean_t mp_kdp_trap = FALSE;
1354 volatile unsigned long mp_kdp_ncpus;
1355 boolean_t mp_kdp_state;
1356
1357
1358 void
1359 mp_kdp_enter(void)
1360 {
1361 unsigned int cpu;
1362 unsigned int ncpus = 0;
1363 unsigned int my_cpu;
1364 uint64_t tsc_timeout;
1365
1366 DBG("mp_kdp_enter()\n");
1367
1368 /*
1369 * Here to enter the debugger.
1370 * In case of races, only one cpu is allowed to enter kdp after
1371 * stopping others.
1372 */
1373 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1374 my_cpu = cpu_number();
1375 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1376 simple_lock(&mp_kdp_lock);
1377
1378 if (pmsafe_debug && !kdp_snapshot)
1379 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1380
1381 while (mp_kdp_trap) {
1382 simple_unlock(&mp_kdp_lock);
1383 DBG("mp_kdp_enter() race lost\n");
1384 #if MACH_KDP
1385 mp_kdp_wait(TRUE, FALSE);
1386 #endif
1387 simple_lock(&mp_kdp_lock);
1388 }
1389 debugger_cpu = my_cpu;
1390 ncpus = 1;
1391 mp_kdp_ncpus = 1; /* self */
1392 mp_kdp_trap = TRUE;
1393 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1394 simple_unlock(&mp_kdp_lock);
1395
1396 /*
1397 * Deliver a nudge to other cpus, counting how many
1398 */
1399 DBG("mp_kdp_enter() signaling other processors\n");
1400 if (force_immediate_debugger_NMI == FALSE) {
1401 for (cpu = 0; cpu < real_ncpus; cpu++) {
1402 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1403 continue;
1404 ncpus++;
1405 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1406 }
1407 /*
1408 * Wait other processors to synchronize
1409 */
1410 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1411
1412 /*
1413 * This timeout is rather arbitrary; we don't want to NMI
1414 * processors that are executing at potentially
1415 * "unsafe-to-interrupt" points such as the trampolines,
1416 * but neither do we want to lose state by waiting too long.
1417 */
1418 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000);
1419
1420 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1421 /*
1422 * A TLB shootdown request may be pending--this would
1423 * result in the requesting processor waiting in
1424 * PMAP_UPDATE_TLBS() until this processor deals with it.
1425 * Process it, so it can now enter mp_kdp_wait()
1426 */
1427 handle_pending_TLB_flushes();
1428 cpu_pause();
1429 }
1430 /* If we've timed out, and some processor(s) are still unresponsive,
1431 * interrupt them with an NMI via the local APIC.
1432 */
1433 if (mp_kdp_ncpus != ncpus) {
1434 for (cpu = 0; cpu < real_ncpus; cpu++) {
1435 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1436 continue;
1437 if (cpu_signal_pending(cpu, MP_KDP))
1438 cpu_NMI_interrupt(cpu);
1439 }
1440 }
1441 }
1442 else
1443 for (cpu = 0; cpu < real_ncpus; cpu++) {
1444 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1445 continue;
1446 cpu_NMI_interrupt(cpu);
1447 }
1448
1449 DBG("mp_kdp_enter() %lu processors done %s\n",
1450 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1451
1452 postcode(MP_KDP_ENTER);
1453 }
1454
1455 static boolean_t
1456 cpu_signal_pending(int cpu, mp_event_t event)
1457 {
1458 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1459 boolean_t retval = FALSE;
1460
1461 if (i_bit(event, signals))
1462 retval = TRUE;
1463 return retval;
1464 }
1465
1466 long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1467 void *arg0, void *arg1)
1468 {
1469 if (lcpu > (real_ncpus - 1))
1470 return -1;
1471
1472 if (func == NULL)
1473 return -1;
1474
1475 kdp_xcpu_call_func.func = func;
1476 kdp_xcpu_call_func.ret = -1;
1477 kdp_xcpu_call_func.arg0 = arg0;
1478 kdp_xcpu_call_func.arg1 = arg1;
1479 kdp_xcpu_call_func.cpu = lcpu;
1480 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1481 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE)
1482 cpu_pause();
1483 return kdp_xcpu_call_func.ret;
1484 }
1485
1486 static void
1487 kdp_x86_xcpu_poll(void)
1488 {
1489 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1490 kdp_xcpu_call_func.ret =
1491 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1492 kdp_xcpu_call_func.arg1,
1493 cpu_number());
1494 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1495 }
1496 }
1497
1498 static void
1499 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1500 {
1501 DBG("mp_kdp_wait()\n");
1502 /* If an I/O port has been specified as a debugging aid, issue a read */
1503 panic_io_port_read();
1504
1505 #if CONFIG_MCA
1506 /* If we've trapped due to a machine-check, save MCA registers */
1507 mca_check_save();
1508 #endif
1509
1510 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1511 while (mp_kdp_trap || (isNMI == TRUE)) {
1512 /*
1513 * A TLB shootdown request may be pending--this would result
1514 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1515 * until this processor handles it.
1516 * Process it, so it can now enter mp_kdp_wait()
1517 */
1518 if (flush)
1519 handle_pending_TLB_flushes();
1520
1521 kdp_x86_xcpu_poll();
1522 cpu_pause();
1523 }
1524
1525 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1526 DBG("mp_kdp_wait() done\n");
1527 }
1528
1529 void
1530 mp_kdp_exit(void)
1531 {
1532 DBG("mp_kdp_exit()\n");
1533 debugger_cpu = -1;
1534 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1535
1536 debugger_exit_time = mach_absolute_time();
1537
1538 mp_kdp_trap = FALSE;
1539 __asm__ volatile("mfence");
1540
1541 /* Wait other processors to stop spinning. XXX needs timeout */
1542 DBG("mp_kdp_exit() waiting for processors to resume\n");
1543 while (mp_kdp_ncpus > 0) {
1544 /*
1545 * a TLB shootdown request may be pending... this would result in the requesting
1546 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1547 * Process it, so it can now enter mp_kdp_wait()
1548 */
1549 handle_pending_TLB_flushes();
1550
1551 cpu_pause();
1552 }
1553
1554 if (pmsafe_debug && !kdp_snapshot)
1555 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1556
1557 debugger_exit_time = mach_absolute_time();
1558
1559 DBG("mp_kdp_exit() done\n");
1560 (void) ml_set_interrupts_enabled(mp_kdp_state);
1561 postcode(0);
1562 }
1563 #endif /* MACH_KDP */
1564
1565 boolean_t
1566 mp_recent_debugger_activity() {
1567 uint64_t abstime = mach_absolute_time();
1568 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1569 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
1570 }
1571
1572 /*ARGSUSED*/
1573 void
1574 init_ast_check(
1575 __unused processor_t processor)
1576 {
1577 }
1578
1579 void
1580 cause_ast_check(
1581 processor_t processor)
1582 {
1583 int cpu = processor->cpu_id;
1584
1585 if (cpu != cpu_number()) {
1586 i386_signal_cpu(cpu, MP_AST, ASYNC);
1587 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1588 }
1589 }
1590
1591 #if MACH_KDB
1592 /*
1593 * invoke kdb on slave processors
1594 */
1595
1596 void
1597 remote_kdb(void)
1598 {
1599 unsigned int my_cpu = cpu_number();
1600 unsigned int cpu;
1601 int kdb_ncpus;
1602 uint64_t tsc_timeout = 0;
1603
1604 mp_kdb_trap = TRUE;
1605 mp_kdb_ncpus = 1;
1606 for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) {
1607 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1608 continue;
1609 kdb_ncpus++;
1610 i386_signal_cpu(cpu, MP_KDB, ASYNC);
1611 }
1612 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus);
1613
1614 tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000);
1615
1616 while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) {
1617 /*
1618 * a TLB shootdown request may be pending... this would result in the requesting
1619 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1620 * Process it, so it can now enter mp_kdp_wait()
1621 */
1622 handle_pending_TLB_flushes();
1623
1624 cpu_pause();
1625 }
1626 DBG("mp_kdp_enter() %lu processors done %s\n",
1627 mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out");
1628 }
1629
1630 static void
1631 mp_kdb_wait(void)
1632 {
1633 DBG("mp_kdb_wait()\n");
1634
1635 /* If an I/O port has been specified as a debugging aid, issue a read */
1636 panic_io_port_read();
1637
1638 atomic_incl(&mp_kdb_ncpus, 1);
1639 while (mp_kdb_trap) {
1640 /*
1641 * a TLB shootdown request may be pending... this would result in the requesting
1642 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1643 * Process it, so it can now enter mp_kdp_wait()
1644 */
1645 handle_pending_TLB_flushes();
1646
1647 cpu_pause();
1648 }
1649 atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
1650 DBG("mp_kdb_wait() done\n");
1651 }
1652
1653 /*
1654 * Clear kdb interrupt
1655 */
1656
1657 void
1658 clear_kdb_intr(void)
1659 {
1660 mp_disable_preemption();
1661 i_bit_clear(MP_KDB, &current_cpu_datap()->cpu_signals);
1662 mp_enable_preemption();
1663 }
1664
1665 void
1666 mp_kdb_exit(void)
1667 {
1668 DBG("mp_kdb_exit()\n");
1669 atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
1670 mp_kdb_trap = FALSE;
1671 __asm__ volatile("mfence");
1672
1673 while (mp_kdb_ncpus > 0) {
1674 /*
1675 * a TLB shootdown request may be pending... this would result in the requesting
1676 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1677 * Process it, so it can now enter mp_kdp_wait()
1678 */
1679 handle_pending_TLB_flushes();
1680
1681 cpu_pause();
1682 }
1683
1684 DBG("mp_kdb_exit() done\n");
1685 }
1686
1687 #endif /* MACH_KDB */
1688
1689 void
1690 slave_machine_init(void *param)
1691 {
1692 /*
1693 * Here in process context, but with interrupts disabled.
1694 */
1695 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1696
1697 if (param == FULL_SLAVE_INIT) {
1698 /*
1699 * Cold start
1700 */
1701 clock_init();
1702 cpu_machine_init(); /* Interrupts enabled hereafter */
1703 mp_cpus_call_cpu_init();
1704 }
1705 }
1706
1707 #undef cpu_number
1708 int cpu_number(void)
1709 {
1710 return get_cpu_number();
1711 }
1712
1713 #if MACH_KDB
1714 #include <ddb/db_output.h>
1715
1716 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1717
1718
1719 #if TRAP_DEBUG
1720 #define MTRAPS 100
1721 struct mp_trap_hist_struct {
1722 unsigned char type;
1723 unsigned char data[5];
1724 } trap_hist[MTRAPS], *cur_trap_hist = trap_hist,
1725 *max_trap_hist = &trap_hist[MTRAPS];
1726
1727 void db_trap_hist(void);
1728
1729 /*
1730 * SPL:
1731 * 1: new spl
1732 * 2: old spl
1733 * 3: new tpr
1734 * 4: old tpr
1735 * INT:
1736 * 1: int vec
1737 * 2: old spl
1738 * 3: new spl
1739 * 4: post eoi tpr
1740 * 5: exit tpr
1741 */
1742
1743 void
1744 db_trap_hist(void)
1745 {
1746 int i,j;
1747 for(i=0;i<MTRAPS;i++)
1748 if (trap_hist[i].type == 1 || trap_hist[i].type == 2) {
1749 db_printf("%s%s",
1750 (&trap_hist[i]>=cur_trap_hist)?"*":" ",
1751 (trap_hist[i].type == 1)?"SPL":"INT");
1752 for(j=0;j<5;j++)
1753 db_printf(" %02x", trap_hist[i].data[j]);
1754 db_printf("\n");
1755 }
1756
1757 }
1758 #endif /* TRAP_DEBUG */
1759 #endif /* MACH_KDB */
1760
1761 static void
1762 cpu_prewarm_init()
1763 {
1764 int i;
1765
1766 simple_lock_init(&cpu_warm_lock, 0);
1767 queue_init(&cpu_warm_call_list);
1768 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
1769 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
1770 }
1771 }
1772
1773 static timer_call_t
1774 grab_warm_timer_call()
1775 {
1776 spl_t x;
1777 timer_call_t call = NULL;
1778
1779 x = splsched();
1780 simple_lock(&cpu_warm_lock);
1781 if (!queue_empty(&cpu_warm_call_list)) {
1782 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
1783 }
1784 simple_unlock(&cpu_warm_lock);
1785 splx(x);
1786
1787 return call;
1788 }
1789
1790 static void
1791 free_warm_timer_call(timer_call_t call)
1792 {
1793 spl_t x;
1794
1795 x = splsched();
1796 simple_lock(&cpu_warm_lock);
1797 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
1798 simple_unlock(&cpu_warm_lock);
1799 splx(x);
1800 }
1801
1802 /*
1803 * Runs in timer call context (interrupts disabled).
1804 */
1805 static void
1806 cpu_warm_timer_call_func(
1807 call_entry_param_t p0,
1808 __unused call_entry_param_t p1)
1809 {
1810 free_warm_timer_call((timer_call_t)p0);
1811 return;
1812 }
1813
1814 /*
1815 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
1816 */
1817 static void
1818 _cpu_warm_setup(
1819 void *arg)
1820 {
1821 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
1822
1823 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL);
1824 cwdp->cwd_result = 0;
1825
1826 return;
1827 }
1828
1829 /*
1830 * Not safe to call with interrupts disabled.
1831 */
1832 kern_return_t
1833 ml_interrupt_prewarm(
1834 uint64_t deadline)
1835 {
1836 struct cpu_warm_data cwd;
1837 timer_call_t call;
1838 cpu_t ct;
1839
1840 if (ml_get_interrupts_enabled() == FALSE) {
1841 panic("%s: Interrupts disabled?\n", __FUNCTION__);
1842 }
1843
1844 /*
1845 * If the platform doesn't need our help, say that we succeeded.
1846 */
1847 if (!ml_get_interrupt_prewake_applicable()) {
1848 return KERN_SUCCESS;
1849 }
1850
1851 /*
1852 * Grab a timer call to use.
1853 */
1854 call = grab_warm_timer_call();
1855 if (call == NULL) {
1856 return KERN_RESOURCE_SHORTAGE;
1857 }
1858
1859 timer_call_setup(call, cpu_warm_timer_call_func, call);
1860 cwd.cwd_call = call;
1861 cwd.cwd_deadline = deadline;
1862 cwd.cwd_result = 0;
1863
1864 /*
1865 * For now, non-local interrupts happen on the master processor.
1866 */
1867 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
1868 if (ct == 0) {
1869 free_warm_timer_call(call);
1870 return KERN_FAILURE;
1871 } else {
1872 return cwd.cwd_result;
1873 }
1874 }