2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
35 #include <mach_ldebug.h>
38 #include <mach/mach_types.h>
39 #include <mach/kern_return.h>
41 #include <kern/kern_types.h>
42 #include <kern/startup.h>
43 #include <kern/timer_queue.h>
44 #include <kern/processor.h>
45 #include <kern/cpu_number.h>
46 #include <kern/cpu_data.h>
47 #include <kern/assert.h>
48 #include <kern/machine.h>
50 #include <kern/misc_protos.h>
51 #include <kern/etimer.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
56 #include <profiling/profile-mk.h>
58 #include <i386/proc_reg.h>
59 #include <i386/cpu_threads.h>
60 #include <i386/mp_desc.h>
61 #include <i386/misc_protos.h>
62 #include <i386/trap.h>
63 #include <i386/postcode.h>
64 #include <i386/machine_routines.h>
66 #include <i386/mp_events.h>
67 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/mtrr.h>
73 #include <i386/pmCPU.h>
75 #include <i386/machine_check.h>
77 #include <i386/acpi.h>
79 #include <chud/chud_xnu.h>
80 #include <chud/chud_xnu_private.h>
82 #include <sys/kdebug.h>
84 #include <machine/db_machdep.h>
85 #include <ddb/db_aout.h>
86 #include <ddb/db_access.h>
87 #include <ddb/db_sym.h>
88 #include <ddb/db_variables.h>
89 #include <ddb/db_command.h>
90 #include <ddb/db_output.h>
91 #include <ddb/db_expr.h>
95 #define PAUSE delay(1000000)
96 #define DBG(x...) kprintf(x)
100 #endif /* MP_DEBUG */
103 #define ABS(v) (((v) > 0)?(v):-(v))
105 void slave_boot_init(void);
108 static void mp_kdb_wait(void);
109 volatile boolean_t mp_kdb_trap
= FALSE
;
110 volatile long mp_kdb_ncpus
= 0;
113 static void mp_kdp_wait(boolean_t flush
, boolean_t isNMI
);
114 static void mp_rendezvous_action(void);
115 static void mp_broadcast_action(void);
117 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
118 static int cpu_signal_handler(x86_saved_state_t
*regs
);
119 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
121 boolean_t smp_initialized
= FALSE
;
122 uint32_t TSC_sync_margin
= 0xFFF;
123 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
124 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
125 decl_simple_lock_data(,mp_kdp_lock
);
127 decl_lck_mtx_data(static, mp_cpu_boot_lock
);
128 lck_mtx_ext_t mp_cpu_boot_lock_ext
;
130 /* Variables needed for MP rendezvous. */
131 decl_simple_lock_data(,mp_rv_lock
);
132 static void (*mp_rv_setup_func
)(void *arg
);
133 static void (*mp_rv_action_func
)(void *arg
);
134 static void (*mp_rv_teardown_func
)(void *arg
);
135 static void *mp_rv_func_arg
;
136 static volatile int mp_rv_ncpus
;
137 /* Cache-aligned barriers: */
138 static volatile long mp_rv_entry
__attribute__((aligned(64)));
139 static volatile long mp_rv_exit
__attribute__((aligned(64)));
140 static volatile long mp_rv_complete
__attribute__((aligned(64)));
142 volatile uint64_t debugger_entry_time
;
143 volatile uint64_t debugger_exit_time
;
146 extern int kdp_snapshot
;
147 static struct _kdp_xcpu_call_func
{
148 kdp_x86_xcpu_func_t func
;
151 volatile uint16_t cpu
;
152 } kdp_xcpu_call_func
= {
158 /* Variables needed for MP broadcast. */
159 static void (*mp_bc_action_func
)(void *arg
);
160 static void *mp_bc_func_arg
;
161 static int mp_bc_ncpus
;
162 static volatile long mp_bc_count
;
163 decl_lck_mtx_data(static, mp_bc_lock
);
164 lck_mtx_ext_t mp_bc_lock_ext
;
165 static volatile int debugger_cpu
= -1;
167 static void mp_cpus_call_action(void);
168 static void mp_call_PM(void);
170 char mp_slave_stack
[PAGE_SIZE
] __attribute__((aligned(PAGE_SIZE
))); // Temp stack for slave init
175 * Initialize dummy structs for profiling. These aren't used but
176 * allows hertz_tick() to be built with GPROF defined.
178 struct profile_vars _profile_vars
;
179 struct profile_vars
*_profile_vars_cpus
[MAX_CPUS
] = { &_profile_vars
};
180 #define GPROF_INIT() \
184 /* Hack to initialize pointers to unused profiling structs */ \
185 for (i = 1; i < MAX_CPUS; i++) \
186 _profile_vars_cpus[i] = &_profile_vars; \
192 static lck_grp_t smp_lck_grp
;
193 static lck_grp_attr_t smp_lck_grp_attr
;
195 extern void slave_pstart(void);
200 simple_lock_init(&mp_kdp_lock
, 0);
201 simple_lock_init(&mp_rv_lock
, 0);
202 lck_grp_attr_setdefault(&smp_lck_grp_attr
);
203 lck_grp_init(&smp_lck_grp
, "i386_smp", &smp_lck_grp_attr
);
204 lck_mtx_init_ext(&mp_cpu_boot_lock
, &mp_cpu_boot_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
205 lck_mtx_init_ext(&mp_bc_lock
, &mp_bc_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
214 lapic_set_intr_func(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
);
215 lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
);
220 DBGLOG_CPU_INIT(master_cpu
);
222 install_real_mode_bootstrap(slave_pstart
);
224 if (PE_parse_boot_argn("TSC_sync_margin",
225 &TSC_sync_margin
, sizeof(TSC_sync_margin
)))
226 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin
);
227 smp_initialized
= TRUE
;
236 } processor_start_info_t
;
237 static processor_start_info_t start_info
__attribute__((aligned(64)));
240 * Cache-alignment is to avoid cross-cpu false-sharing interference.
242 static volatile long tsc_entry_barrier
__attribute__((aligned(64)));
243 static volatile long tsc_exit_barrier
__attribute__((aligned(64)));
244 static volatile uint64_t tsc_target
__attribute__((aligned(64)));
247 * Poll a CPU to see when it has marked itself as running.
250 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
252 while (iters
-- > 0) {
253 if (cpu_datap(slot_num
)->cpu_running
)
260 * Quickly bring a CPU back online which has been halted.
263 intel_startCPU_fast(int slot_num
)
268 * Try to perform a fast restart
270 rc
= pmCPUExitHalt(slot_num
);
271 if (rc
!= KERN_SUCCESS
)
273 * The CPU was not eligible for a fast restart.
278 * Wait until the CPU is back online.
280 mp_disable_preemption();
283 * We use short pauses (1us) for low latency. 30,000 iterations is
284 * longer than a full restart would require so it should be more
287 mp_wait_for_cpu_up(slot_num
, 30000, 1);
288 mp_enable_preemption();
291 * Check to make sure that the CPU is really running. If not,
292 * go through the slow path.
294 if (cpu_datap(slot_num
)->cpu_running
)
295 return(KERN_SUCCESS
);
297 return(KERN_FAILURE
);
303 /* Here on the started cpu with cpu_running set TRUE */
305 if (TSC_sync_margin
&&
306 start_info
.target_cpu
== cpu_number()) {
308 * I've just started-up, synchronize again with the starter cpu
309 * and then snap my TSC.
312 atomic_decl(&tsc_entry_barrier
, 1);
313 while (tsc_entry_barrier
!= 0)
314 ; /* spin for starter and target at barrier */
315 tsc_target
= rdtsc64();
316 atomic_decl(&tsc_exit_barrier
, 1);
324 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
326 /* Ignore this if the current processor is not the starter */
327 if (cpu_number() != psip
->starter_cpu
)
330 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
331 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_INIT
);
334 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
335 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(REAL_MODE_BOOTSTRAP_OFFSET
>>12));
337 #ifdef POSTCODE_DELAY
338 /* Wait much longer if postcodes are displayed for a delay period. */
341 mp_wait_for_cpu_up(psip
->target_cpu
, i
*100, 100);
342 if (TSC_sync_margin
&&
343 cpu_datap(psip
->target_cpu
)->cpu_running
) {
345 * Compare the TSC from the started processor with ours.
346 * Report and log/panic if it diverges by more than
347 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
348 * can be overriden by boot-arg (with 0 meaning no checking).
350 uint64_t tsc_starter
;
352 atomic_decl(&tsc_entry_barrier
, 1);
353 while (tsc_entry_barrier
!= 0)
354 ; /* spin for both processors at barrier */
355 tsc_starter
= rdtsc64();
356 atomic_decl(&tsc_exit_barrier
, 1);
357 while (tsc_exit_barrier
!= 0)
358 ; /* spin for target to store its TSC */
359 tsc_delta
= tsc_target
- tsc_starter
;
360 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
361 psip
->target_cpu
, tsc_target
, tsc_delta
, tsc_delta
);
362 if (ABS(tsc_delta
) > (int64_t) TSC_sync_margin
) {
368 "Unsynchronized TSC for cpu %d: "
369 "0x%016llx, delta 0x%llx\n",
370 psip
->target_cpu
, tsc_target
, tsc_delta
);
375 extern char prot_mode_gdt
[];
376 extern char slave_boot_base
[];
377 extern char real_mode_bootstrap_base
[];
378 extern char real_mode_bootstrap_end
[];
379 extern char slave_boot_end
[];
385 int lapic
= cpu_to_lapic
[slot_num
];
390 DBGLOG_CPU_INIT(slot_num
);
392 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
393 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) IdlePTD
);
396 * Initialize (or re-initialize) the descriptor tables for this cpu.
397 * Propagate processor mode to slave.
399 if (cpu_mode_is64bit())
400 cpu_desc_init64(cpu_datap(slot_num
));
402 cpu_desc_init(cpu_datap(slot_num
));
404 /* Serialize use of the slave boot stack, etc. */
405 lck_mtx_lock(&mp_cpu_boot_lock
);
407 istate
= ml_set_interrupts_enabled(FALSE
);
408 if (slot_num
== get_cpu_number()) {
409 ml_set_interrupts_enabled(istate
);
410 lck_mtx_unlock(&mp_cpu_boot_lock
);
414 start_info
.starter_cpu
= cpu_number();
415 start_info
.target_cpu
= slot_num
;
416 start_info
.target_lapic
= lapic
;
417 tsc_entry_barrier
= 2;
418 tsc_exit_barrier
= 2;
421 * Perform the processor startup sequence with all running
422 * processors rendezvous'ed. This is required during periods when
423 * the cache-disable bit is set for MTRR/PAT initialization.
425 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
427 start_info
.target_cpu
= 0;
429 ml_set_interrupts_enabled(istate
);
430 lck_mtx_unlock(&mp_cpu_boot_lock
);
432 if (!cpu_datap(slot_num
)->cpu_running
) {
433 kprintf("Failed to start CPU %02d\n", slot_num
);
434 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
439 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
445 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
446 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
448 MP_EVENT_NAME_DECL();
450 #endif /* MP_DEBUG */
453 cpu_signal_handler(x86_saved_state_t
*regs
)
456 volatile int *my_word
;
457 #if MACH_KDB && MACH_ASSERT
459 #endif /* MACH_KDB && MACH_ASSERT */
461 mp_disable_preemption();
463 my_cpu
= cpu_number();
464 my_word
= ¤t_cpu_datap()->cpu_signals
;
467 #if MACH_KDB && MACH_ASSERT
469 Debugger("cpu_signal_handler: signals did not clear");
470 #endif /* MACH_KDB && MACH_ASSERT */
472 if (i_bit(MP_KDP
, my_word
)) {
473 DBGLOG(cpu_handle
,my_cpu
,MP_KDP
);
474 i_bit_clear(MP_KDP
, my_word
);
475 /* Ensure that the i386_kernel_state at the base of the
476 * current thread's stack (if any) is synchronized with the
477 * context at the moment of the interrupt, to facilitate
478 * access through the debugger.
480 sync_iss_to_iks(regs
);
481 if (pmsafe_debug
&& !kdp_snapshot
)
482 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
483 mp_kdp_wait(TRUE
, FALSE
);
484 if (pmsafe_debug
&& !kdp_snapshot
)
485 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
487 #endif /* MACH_KDP */
488 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
489 DBGLOG(cpu_handle
,my_cpu
,MP_TLB_FLUSH
);
490 i_bit_clear(MP_TLB_FLUSH
, my_word
);
491 pmap_update_interrupt();
492 } else if (i_bit(MP_AST
, my_word
)) {
493 DBGLOG(cpu_handle
,my_cpu
,MP_AST
);
494 i_bit_clear(MP_AST
, my_word
);
495 ast_check(cpu_to_processor(my_cpu
));
497 } else if (i_bit(MP_KDB
, my_word
)) {
499 i_bit_clear(MP_KDB
, my_word
);
500 current_cpu_datap()->cpu_kdb_is_slave
++;
502 current_cpu_datap()->cpu_kdb_is_slave
--;
503 #endif /* MACH_KDB */
504 } else if (i_bit(MP_RENDEZVOUS
, my_word
)) {
505 DBGLOG(cpu_handle
,my_cpu
,MP_RENDEZVOUS
);
506 i_bit_clear(MP_RENDEZVOUS
, my_word
);
507 mp_rendezvous_action();
508 } else if (i_bit(MP_BROADCAST
, my_word
)) {
509 DBGLOG(cpu_handle
,my_cpu
,MP_BROADCAST
);
510 i_bit_clear(MP_BROADCAST
, my_word
);
511 mp_broadcast_action();
512 } else if (i_bit(MP_CHUD
, my_word
)) {
513 DBGLOG(cpu_handle
,my_cpu
,MP_CHUD
);
514 i_bit_clear(MP_CHUD
, my_word
);
515 chudxnu_cpu_signal_handler();
516 } else if (i_bit(MP_CALL
, my_word
)) {
517 DBGLOG(cpu_handle
,my_cpu
,MP_CALL
);
518 i_bit_clear(MP_CALL
, my_word
);
519 mp_cpus_call_action();
520 } else if (i_bit(MP_CALL_PM
, my_word
)) {
521 DBGLOG(cpu_handle
,my_cpu
,MP_CALL_PM
);
522 i_bit_clear(MP_CALL_PM
, my_word
);
527 mp_enable_preemption();
533 NMIInterruptHandler(x86_saved_state_t
*regs
)
537 sync_iss_to_iks_unconditionally(regs
);
538 #if defined (__i386__)
539 __asm__
volatile("movl %%ebp, %0" : "=m" (stackptr
));
540 #elif defined (__x86_64__)
541 __asm__
volatile("movq %%rbp, %0" : "=m" (stackptr
));
544 if (cpu_number() == debugger_cpu
)
547 if (pmap_tlb_flush_timeout
== TRUE
&& current_cpu_datap()->cpu_tlb_invalid
) {
549 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
550 panic_i386_backtrace(stackptr
, 16, &pstr
[0], TRUE
, regs
);
554 if (pmsafe_debug
&& !kdp_snapshot
)
555 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
556 mp_kdp_wait(FALSE
, pmap_tlb_flush_timeout
);
557 if (pmsafe_debug
&& !kdp_snapshot
)
558 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
565 int max_lock_loops
= 100000000;
566 int trappedalready
= 0; /* (BRINGUP) */
567 #endif /* MP_DEBUG */
570 i386_cpu_IPI(int cpu
)
575 if(cpu_datap(cpu
)->cpu_signals
& 6) { /* (BRINGUP) */
576 kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu
)->cpu_signals
, cpu
);
578 #endif /* MP_DEBUG */
582 if(!trappedalready
&& (cpu_datap(cpu
)->cpu_signals
& 6)) { /* (BRINGUP) */
583 if(kdb_cpu
!= cpu_number()) {
585 panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n",
586 cpu_datap(cpu
)->cpu_signals
, cpu
, kdb_cpu
);
589 #endif /* MP_DEBUG */
592 /* Wait for previous interrupt to be delivered... */
594 int pending_busy_count
= 0;
595 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
596 if (++pending_busy_count
> max_lock_loops
)
597 panic("i386_cpu_IPI() deadlock\n");
599 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
600 #endif /* MP_DEBUG */
604 state
= ml_set_interrupts_enabled(FALSE
);
605 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
606 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
) | LAPIC_ICR_DM_FIXED
);
607 (void) ml_set_interrupts_enabled(state
);
611 * cpu_interrupt is really just to be used by the scheduler to
612 * get a CPU's attention it may not always issue an IPI. If an
613 * IPI is always needed then use i386_cpu_IPI.
616 cpu_interrupt(int cpu
)
619 && pmCPUExitIdle(cpu_datap(cpu
))) {
625 * Send a true NMI via the local APIC to the specified CPU.
628 cpu_NMI_interrupt(int cpu
)
632 if (smp_initialized
) {
633 state
= ml_set_interrupts_enabled(FALSE
);
634 /* Program the interrupt command register */
635 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
636 /* The vector is ignored in this case--the target CPU will enter on the
639 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
)|LAPIC_ICR_DM_NMI
);
640 (void) ml_set_interrupts_enabled(state
);
644 static void (* volatile mp_PM_func
)(void) = NULL
;
649 assert(!ml_get_interrupts_enabled());
651 if (mp_PM_func
!= NULL
)
656 cpu_PM_interrupt(int cpu
)
658 assert(!ml_get_interrupts_enabled());
660 if (mp_PM_func
!= NULL
) {
661 if (cpu
== cpu_number())
664 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
669 PM_interrupt_register(void (*fn
)(void))
675 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
677 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
678 uint64_t tsc_timeout
;
681 if (!cpu_datap(cpu
)->cpu_running
)
684 if (event
== MP_TLB_FLUSH
)
685 KERNEL_DEBUG(0xef800020 | DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
687 DBGLOG(cpu_signal
, cpu
, event
);
689 i_bit_set(event
, signals
);
693 tsc_timeout
= rdtsc64() + (1000*1000*1000);
694 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
697 if (i_bit(event
, signals
)) {
698 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
703 if (event
== MP_TLB_FLUSH
)
704 KERNEL_DEBUG(0xef800020 | DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
708 * Send event to all running cpus.
709 * Called with the topology locked.
712 i386_signal_cpus(mp_event_t event
, mp_sync_t mode
)
715 unsigned int my_cpu
= cpu_number();
717 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
719 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
720 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
722 i386_signal_cpu(cpu
, event
, mode
);
727 * Return the number of running cpus.
728 * Called with the topology locked.
731 i386_active_cpus(void)
734 unsigned int ncpus
= 0;
736 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
738 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
739 if (cpu_datap(cpu
)->cpu_running
)
746 * All-CPU rendezvous:
747 * - CPUs are signalled,
748 * - all execute the setup function (if specified),
749 * - rendezvous (i.e. all cpus reach a barrier),
750 * - all execute the action function (if specified),
751 * - rendezvous again,
752 * - execute the teardown function (if specified), and then
755 * Note that the supplied external functions _must_ be reentrant and aware
756 * that they are running in parallel and in an unknown lock context.
760 mp_rendezvous_action(void)
762 boolean_t intrs_enabled
;
765 if (mp_rv_setup_func
!= NULL
)
766 mp_rv_setup_func(mp_rv_func_arg
);
768 intrs_enabled
= ml_get_interrupts_enabled();
771 /* spin on entry rendezvous */
772 atomic_incl(&mp_rv_entry
, 1);
773 while (mp_rv_entry
< mp_rv_ncpus
) {
774 /* poll for pesky tlb flushes if interrupts disabled */
776 handle_pending_TLB_flushes();
779 /* action function */
780 if (mp_rv_action_func
!= NULL
)
781 mp_rv_action_func(mp_rv_func_arg
);
782 /* spin on exit rendezvous */
783 atomic_incl(&mp_rv_exit
, 1);
784 while (mp_rv_exit
< mp_rv_ncpus
) {
786 handle_pending_TLB_flushes();
789 /* teardown function */
790 if (mp_rv_teardown_func
!= NULL
)
791 mp_rv_teardown_func(mp_rv_func_arg
);
793 /* Bump completion count */
794 atomic_incl(&mp_rv_complete
, 1);
798 mp_rendezvous(void (*setup_func
)(void *),
799 void (*action_func
)(void *),
800 void (*teardown_func
)(void *),
804 if (!smp_initialized
) {
805 if (setup_func
!= NULL
)
807 if (action_func
!= NULL
)
809 if (teardown_func
!= NULL
)
814 /* obtain rendezvous lock */
815 simple_lock(&mp_rv_lock
);
817 /* set static function pointers */
818 mp_rv_setup_func
= setup_func
;
819 mp_rv_action_func
= action_func
;
820 mp_rv_teardown_func
= teardown_func
;
821 mp_rv_func_arg
= arg
;
828 * signal other processors, which will call mp_rendezvous_action()
829 * with interrupts disabled
831 simple_lock(&x86_topo_lock
);
832 mp_rv_ncpus
= i386_active_cpus();
833 i386_signal_cpus(MP_RENDEZVOUS
, ASYNC
);
834 simple_unlock(&x86_topo_lock
);
836 /* call executor function on this cpu */
837 mp_rendezvous_action();
840 * Spin for everyone to complete.
841 * This is necessary to ensure that all processors have proceeded
842 * from the exit barrier before we release the rendezvous structure.
844 while (mp_rv_complete
< mp_rv_ncpus
) {
849 mp_rv_setup_func
= NULL
;
850 mp_rv_action_func
= NULL
;
851 mp_rv_teardown_func
= NULL
;
852 mp_rv_func_arg
= NULL
;
855 simple_unlock(&mp_rv_lock
);
859 mp_rendezvous_break_lock(void)
861 simple_lock_init(&mp_rv_lock
, 0);
865 setup_disable_intrs(__unused
void * param_not_used
)
867 /* disable interrupts before the first barrier */
868 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
870 current_cpu_datap()->cpu_iflag
= intr
;
871 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
875 teardown_restore_intrs(__unused
void * param_not_used
)
877 /* restore interrupt flag following MTRR changes */
878 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
879 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
883 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
884 * This is exported for use by kexts.
887 mp_rendezvous_no_intrs(
888 void (*action_func
)(void *),
891 mp_rendezvous(setup_disable_intrs
,
893 teardown_restore_intrs
,
898 handle_pending_TLB_flushes(void)
900 volatile int *my_word
= ¤t_cpu_datap()->cpu_signals
;
902 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
903 DBGLOG(cpu_handle
, cpu_number(), MP_TLB_FLUSH
);
904 i_bit_clear(MP_TLB_FLUSH
, my_word
);
905 pmap_update_interrupt();
910 * This is called from cpu_signal_handler() to process an MP_CALL signal.
913 mp_cpus_call_action(void)
915 if (mp_rv_action_func
!= NULL
)
916 mp_rv_action_func(mp_rv_func_arg
);
917 atomic_incl(&mp_rv_complete
, 1);
921 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
922 * If the mode is SYNC, the function is called serially on the target cpus
923 * in logical cpu order. If the mode is ASYNC, the function is called in
924 * parallel over the specified cpus.
925 * The action function may be NULL.
926 * The cpu mask may include the local cpu. Offline cpus are ignored.
927 * Return does not occur until the function has completed on all cpus.
928 * The return value is the number of cpus on which the function was called.
934 void (*action_func
)(void *),
938 boolean_t intrs_enabled
= ml_get_interrupts_enabled();
939 boolean_t call_self
= FALSE
;
941 if (!smp_initialized
) {
942 if ((cpus
& CPUMASK_SELF
) == 0)
944 if (action_func
!= NULL
) {
945 (void) ml_set_interrupts_enabled(FALSE
);
947 ml_set_interrupts_enabled(intrs_enabled
);
952 /* obtain rendezvous lock */
953 simple_lock(&mp_rv_lock
);
955 /* Use the rendezvous data structures for this call */
956 mp_rv_action_func
= action_func
;
957 mp_rv_func_arg
= arg
;
961 simple_lock(&x86_topo_lock
);
962 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
963 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
964 !cpu_datap(cpu
)->cpu_running
)
966 if (cpu
== (cpu_t
) cpu_number()) {
968 * We don't IPI ourself and if calling asynchronously,
969 * we defer our call until we have signalled all others.
972 if (mode
== SYNC
&& action_func
!= NULL
) {
973 (void) ml_set_interrupts_enabled(FALSE
);
975 ml_set_interrupts_enabled(intrs_enabled
);
979 * Bump count of other cpus called and signal this cpu.
980 * Note: we signal asynchronously regardless of mode
981 * because we wait on mp_rv_complete either here
982 * (if mode == SYNC) or later (if mode == ASYNC).
983 * While spinning, poll for TLB flushes if interrupts
987 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
989 simple_unlock(&x86_topo_lock
);
990 while (mp_rv_complete
< mp_rv_ncpus
) {
992 handle_pending_TLB_flushes();
995 simple_lock(&x86_topo_lock
);
999 simple_unlock(&x86_topo_lock
);
1002 * If calls are being made asynchronously,
1003 * make the local call now if needed, and then
1004 * wait for all other cpus to finish their calls.
1006 if (mode
== ASYNC
) {
1007 if (call_self
&& action_func
!= NULL
) {
1008 (void) ml_set_interrupts_enabled(FALSE
);
1010 ml_set_interrupts_enabled(intrs_enabled
);
1012 while (mp_rv_complete
< mp_rv_ncpus
) {
1014 handle_pending_TLB_flushes();
1019 /* Determine the number of cpus called */
1020 cpu
= mp_rv_ncpus
+ (call_self
? 1 : 0);
1022 simple_unlock(&mp_rv_lock
);
1028 mp_broadcast_action(void)
1030 /* call action function */
1031 if (mp_bc_action_func
!= NULL
)
1032 mp_bc_action_func(mp_bc_func_arg
);
1034 /* if we're the last one through, wake up the instigator */
1035 if (atomic_decl_and_test(&mp_bc_count
, 1))
1036 thread_wakeup(((event_t
)(uintptr_t) &mp_bc_count
));
1040 * mp_broadcast() runs a given function on all active cpus.
1041 * The caller blocks until the functions has run on all cpus.
1042 * The caller will also block if there is another pending braodcast.
1046 void (*action_func
)(void *),
1049 if (!smp_initialized
) {
1050 if (action_func
!= NULL
)
1055 /* obtain broadcast lock */
1056 lck_mtx_lock(&mp_bc_lock
);
1058 /* set static function pointers */
1059 mp_bc_action_func
= action_func
;
1060 mp_bc_func_arg
= arg
;
1062 assert_wait((event_t
)(uintptr_t)&mp_bc_count
, THREAD_UNINT
);
1065 * signal other processors, which will call mp_broadcast_action()
1067 simple_lock(&x86_topo_lock
);
1068 mp_bc_ncpus
= i386_active_cpus(); /* total including this cpu */
1069 mp_bc_count
= mp_bc_ncpus
;
1070 i386_signal_cpus(MP_BROADCAST
, ASYNC
);
1072 /* call executor function on this cpu */
1073 mp_broadcast_action();
1074 simple_unlock(&x86_topo_lock
);
1076 /* block for all cpus to have run action_func */
1077 if (mp_bc_ncpus
> 1)
1078 thread_block(THREAD_CONTINUE_NULL
);
1080 clear_wait(current_thread(), THREAD_AWAKENED
);
1083 lck_mtx_unlock(&mp_bc_lock
);
1087 i386_activate_cpu(void)
1089 cpu_data_t
*cdp
= current_cpu_datap();
1091 assert(!ml_get_interrupts_enabled());
1093 if (!smp_initialized
) {
1094 cdp
->cpu_running
= TRUE
;
1098 simple_lock(&x86_topo_lock
);
1099 cdp
->cpu_running
= TRUE
;
1101 simple_unlock(&x86_topo_lock
);
1104 extern void etimer_timer_expire(void *arg
);
1107 i386_deactivate_cpu(void)
1109 cpu_data_t
*cdp
= current_cpu_datap();
1111 assert(!ml_get_interrupts_enabled());
1113 simple_lock(&x86_topo_lock
);
1114 cdp
->cpu_running
= FALSE
;
1115 simple_unlock(&x86_topo_lock
);
1117 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1118 cdp
->rtclock_timer
.deadline
= EndOfAllTime
;
1119 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, etimer_timer_expire
, NULL
);
1122 * In case a rendezvous/braodcast/call was initiated to this cpu
1123 * before we cleared cpu_running, we must perform any actions due.
1125 if (i_bit(MP_RENDEZVOUS
, &cdp
->cpu_signals
))
1126 mp_rendezvous_action();
1127 if (i_bit(MP_BROADCAST
, &cdp
->cpu_signals
))
1128 mp_broadcast_action();
1129 if (i_bit(MP_CALL
, &cdp
->cpu_signals
))
1130 mp_cpus_call_action();
1131 cdp
->cpu_signals
= 0; /* all clear */
1134 int pmsafe_debug
= 1;
1137 volatile boolean_t mp_kdp_trap
= FALSE
;
1138 volatile unsigned long mp_kdp_ncpus
;
1139 boolean_t mp_kdp_state
;
1147 unsigned int my_cpu
;
1148 uint64_t tsc_timeout
;
1150 DBG("mp_kdp_enter()\n");
1153 * Here to enter the debugger.
1154 * In case of races, only one cpu is allowed to enter kdp after
1157 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1158 simple_lock(&mp_kdp_lock
);
1159 debugger_entry_time
= mach_absolute_time();
1160 if (pmsafe_debug
&& !kdp_snapshot
)
1161 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1163 while (mp_kdp_trap
) {
1164 simple_unlock(&mp_kdp_lock
);
1165 DBG("mp_kdp_enter() race lost\n");
1167 mp_kdp_wait(TRUE
, FALSE
);
1169 simple_lock(&mp_kdp_lock
);
1171 my_cpu
= cpu_number();
1172 debugger_cpu
= my_cpu
;
1173 mp_kdp_ncpus
= 1; /* self */
1175 simple_unlock(&mp_kdp_lock
);
1178 * Deliver a nudge to other cpus, counting how many
1180 DBG("mp_kdp_enter() signaling other processors\n");
1181 if (force_immediate_debugger_NMI
== FALSE
) {
1182 for (ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1183 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1186 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1189 * Wait other processors to synchronize
1191 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1194 * This timeout is rather arbitrary; we don't want to NMI
1195 * processors that are executing at potentially
1196 * "unsafe-to-interrupt" points such as the trampolines,
1197 * but neither do we want to lose state by waiting too long.
1199 tsc_timeout
= rdtsc64() + (ncpus
* 1000 * 1000);
1201 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1203 * A TLB shootdown request may be pending--this would
1204 * result in the requesting processor waiting in
1205 * PMAP_UPDATE_TLBS() until this processor deals with it.
1206 * Process it, so it can now enter mp_kdp_wait()
1208 handle_pending_TLB_flushes();
1211 /* If we've timed out, and some processor(s) are still unresponsive,
1212 * interrupt them with an NMI via the local APIC.
1214 if (mp_kdp_ncpus
!= ncpus
) {
1215 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1216 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1218 if (cpu_signal_pending(cpu
, MP_KDP
))
1219 cpu_NMI_interrupt(cpu
);
1224 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1225 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1227 cpu_NMI_interrupt(cpu
);
1230 DBG("mp_kdp_enter() %u processors done %s\n",
1231 mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1233 postcode(MP_KDP_ENTER
);
1237 cpu_signal_pending(int cpu
, mp_event_t event
)
1239 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1240 boolean_t retval
= FALSE
;
1242 if (i_bit(event
, signals
))
1247 long kdp_x86_xcpu_invoke(const uint16_t lcpu
, kdp_x86_xcpu_func_t func
,
1248 void *arg0
, void *arg1
)
1250 if (lcpu
> (real_ncpus
- 1))
1256 kdp_xcpu_call_func
.func
= func
;
1257 kdp_xcpu_call_func
.ret
= -1;
1258 kdp_xcpu_call_func
.arg0
= arg0
;
1259 kdp_xcpu_call_func
.arg1
= arg1
;
1260 kdp_xcpu_call_func
.cpu
= lcpu
;
1261 DBG("Invoking function %p on CPU %d\n", func
, (int32_t)lcpu
);
1262 while (kdp_xcpu_call_func
.cpu
!= KDP_XCPU_NONE
)
1264 return kdp_xcpu_call_func
.ret
;
1268 kdp_x86_xcpu_poll(void)
1270 if ((uint16_t)cpu_number() == kdp_xcpu_call_func
.cpu
) {
1271 kdp_xcpu_call_func
.ret
=
1272 kdp_xcpu_call_func
.func(kdp_xcpu_call_func
.arg0
,
1273 kdp_xcpu_call_func
.arg1
,
1275 kdp_xcpu_call_func
.cpu
= KDP_XCPU_NONE
;
1280 mp_kdp_wait(boolean_t flush
, boolean_t isNMI
)
1282 DBG("mp_kdp_wait()\n");
1283 /* If an I/O port has been specified as a debugging aid, issue a read */
1284 panic_io_port_read();
1287 /* If we've trapped due to a machine-check, save MCA registers */
1291 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1292 while (mp_kdp_trap
|| (isNMI
== TRUE
)) {
1294 * A TLB shootdown request may be pending--this would result
1295 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1296 * until this processor handles it.
1297 * Process it, so it can now enter mp_kdp_wait()
1300 handle_pending_TLB_flushes();
1302 kdp_x86_xcpu_poll();
1306 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1307 DBG("mp_kdp_wait() done\n");
1313 DBG("mp_kdp_exit()\n");
1315 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1317 debugger_exit_time
= mach_absolute_time();
1319 mp_kdp_trap
= FALSE
;
1320 __asm__
volatile("mfence");
1322 /* Wait other processors to stop spinning. XXX needs timeout */
1323 DBG("mp_kdp_exit() waiting for processors to resume\n");
1324 while (mp_kdp_ncpus
> 0) {
1326 * a TLB shootdown request may be pending... this would result in the requesting
1327 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1328 * Process it, so it can now enter mp_kdp_wait()
1330 handle_pending_TLB_flushes();
1335 if (pmsafe_debug
&& !kdp_snapshot
)
1336 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1338 DBG("mp_kdp_exit() done\n");
1339 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1342 #endif /* MACH_KDP */
1345 mp_recent_debugger_activity() {
1346 return (((mach_absolute_time() - debugger_entry_time
) < LastDebuggerEntryAllowance
) ||
1347 ((mach_absolute_time() - debugger_exit_time
) < LastDebuggerEntryAllowance
));
1353 __unused processor_t processor
)
1359 processor_t processor
)
1361 int cpu
= processor
->cpu_id
;
1363 if (cpu
!= cpu_number()) {
1364 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1370 * invoke kdb on slave processors
1376 unsigned int my_cpu
= cpu_number();
1379 uint64_t tsc_timeout
= 0;
1383 for (kdb_ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1384 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1387 i386_signal_cpu(cpu
, MP_KDB
, ASYNC
);
1389 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus
);
1391 tsc_timeout
= rdtsc64() + (kdb_ncpus
* 100 * 1000 * 1000);
1393 while (mp_kdb_ncpus
!= kdb_ncpus
&& rdtsc64() < tsc_timeout
) {
1395 * a TLB shootdown request may be pending... this would result in the requesting
1396 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1397 * Process it, so it can now enter mp_kdp_wait()
1399 handle_pending_TLB_flushes();
1403 DBG("mp_kdp_enter() %d processors done %s\n",
1404 mp_kdb_ncpus
, (mp_kdb_ncpus
== kdb_ncpus
) ? "OK" : "timed out");
1410 DBG("mp_kdb_wait()\n");
1412 /* If an I/O port has been specified as a debugging aid, issue a read */
1413 panic_io_port_read();
1415 atomic_incl(&mp_kdb_ncpus
, 1);
1416 while (mp_kdb_trap
) {
1418 * a TLB shootdown request may be pending... this would result in the requesting
1419 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1420 * Process it, so it can now enter mp_kdp_wait()
1422 handle_pending_TLB_flushes();
1426 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1427 DBG("mp_kdb_wait() done\n");
1431 * Clear kdb interrupt
1435 clear_kdb_intr(void)
1437 mp_disable_preemption();
1438 i_bit_clear(MP_KDB
, ¤t_cpu_datap()->cpu_signals
);
1439 mp_enable_preemption();
1445 DBG("mp_kdb_exit()\n");
1446 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1447 mp_kdb_trap
= FALSE
;
1448 __asm__
volatile("mfence");
1450 while (mp_kdb_ncpus
> 0) {
1452 * a TLB shootdown request may be pending... this would result in the requesting
1453 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1454 * Process it, so it can now enter mp_kdp_wait()
1456 handle_pending_TLB_flushes();
1461 DBG("mp_kdb_exit() done\n");
1464 #endif /* MACH_KDB */
1467 slave_machine_init(void *param
)
1470 * Here in process context, but with interrupts disabled.
1472 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1474 if (param
== FULL_SLAVE_INIT
) {
1480 cpu_machine_init(); /* Interrupts enabled hereafter */
1485 int cpu_number(void)
1487 return get_cpu_number();
1491 #include <ddb/db_output.h>
1493 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1498 struct mp_trap_hist_struct
{
1500 unsigned char data
[5];
1501 } trap_hist
[MTRAPS
], *cur_trap_hist
= trap_hist
,
1502 *max_trap_hist
= &trap_hist
[MTRAPS
];
1504 void db_trap_hist(void);
1524 for(i
=0;i
<MTRAPS
;i
++)
1525 if (trap_hist
[i
].type
== 1 || trap_hist
[i
].type
== 2) {
1527 (&trap_hist
[i
]>=cur_trap_hist
)?"*":" ",
1528 (trap_hist
[i
].type
== 1)?"SPL":"INT");
1530 db_printf(" %02x", trap_hist
[i
].data
[j
]);
1535 #endif /* TRAP_DEBUG */
1536 #endif /* MACH_KDB */