2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
35 #include <mach_ldebug.h>
38 #include <mach/mach_types.h>
39 #include <mach/kern_return.h>
41 #include <kern/kern_types.h>
42 #include <kern/startup.h>
43 #include <kern/timer_queue.h>
44 #include <kern/processor.h>
45 #include <kern/cpu_number.h>
46 #include <kern/cpu_data.h>
47 #include <kern/assert.h>
48 #include <kern/machine.h>
50 #include <kern/misc_protos.h>
51 #include <kern/etimer.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
56 #include <profiling/profile-mk.h>
58 #include <i386/proc_reg.h>
59 #include <i386/cpu_threads.h>
60 #include <i386/mp_desc.h>
61 #include <i386/misc_protos.h>
62 #include <i386/trap.h>
63 #include <i386/postcode.h>
64 #include <i386/machine_routines.h>
66 #include <i386/mp_events.h>
67 #include <i386/lapic.h>
69 #include <i386/cpuid.h>
71 #include <i386/machine_cpu.h>
72 #include <i386/mtrr.h>
73 #include <i386/pmCPU.h>
75 #include <i386/machine_check.h>
77 #include <i386/acpi.h>
79 #include <chud/chud_xnu.h>
80 #include <chud/chud_xnu_private.h>
82 #include <sys/kdebug.h>
84 #include <machine/db_machdep.h>
85 #include <ddb/db_aout.h>
86 #include <ddb/db_access.h>
87 #include <ddb/db_sym.h>
88 #include <ddb/db_variables.h>
89 #include <ddb/db_command.h>
90 #include <ddb/db_output.h>
91 #include <ddb/db_expr.h>
95 #define PAUSE delay(1000000)
96 #define DBG(x...) kprintf(x)
100 #endif /* MP_DEBUG */
103 #define ABS(v) (((v) > 0)?(v):-(v))
105 void slave_boot_init(void);
108 static void mp_kdb_wait(void);
109 volatile boolean_t mp_kdb_trap
= FALSE
;
110 volatile long mp_kdb_ncpus
= 0;
113 static void mp_kdp_wait(boolean_t flush
, boolean_t isNMI
);
114 static void mp_rendezvous_action(void);
115 static void mp_broadcast_action(void);
117 static boolean_t
cpu_signal_pending(int cpu
, mp_event_t event
);
118 static int cpu_signal_handler(x86_saved_state_t
*regs
);
119 static int NMIInterruptHandler(x86_saved_state_t
*regs
);
121 boolean_t smp_initialized
= FALSE
;
122 uint32_t TSC_sync_margin
= 0xFFF;
123 volatile boolean_t force_immediate_debugger_NMI
= FALSE
;
124 volatile boolean_t pmap_tlb_flush_timeout
= FALSE
;
125 decl_simple_lock_data(,mp_kdp_lock
);
127 decl_lck_mtx_data(static, mp_cpu_boot_lock
);
128 lck_mtx_ext_t mp_cpu_boot_lock_ext
;
130 /* Variables needed for MP rendezvous. */
131 decl_simple_lock_data(,mp_rv_lock
);
132 static void (*mp_rv_setup_func
)(void *arg
);
133 static void (*mp_rv_action_func
)(void *arg
);
134 static void (*mp_rv_teardown_func
)(void *arg
);
135 static void *mp_rv_func_arg
;
136 static volatile int mp_rv_ncpus
;
137 /* Cache-aligned barriers: */
138 static volatile long mp_rv_entry
__attribute__((aligned(64)));
139 static volatile long mp_rv_exit
__attribute__((aligned(64)));
140 static volatile long mp_rv_complete
__attribute__((aligned(64)));
142 volatile uint64_t debugger_entry_time
;
143 volatile uint64_t debugger_exit_time
;
146 static struct _kdp_xcpu_call_func
{
147 kdp_x86_xcpu_func_t func
;
150 volatile uint16_t cpu
;
151 } kdp_xcpu_call_func
= {
157 /* Variables needed for MP broadcast. */
158 static void (*mp_bc_action_func
)(void *arg
);
159 static void *mp_bc_func_arg
;
160 static int mp_bc_ncpus
;
161 static volatile long mp_bc_count
;
162 decl_lck_mtx_data(static, mp_bc_lock
);
163 lck_mtx_ext_t mp_bc_lock_ext
;
164 static volatile int debugger_cpu
= -1;
166 static void mp_cpus_call_action(void);
167 static void mp_call_PM(void);
169 char mp_slave_stack
[PAGE_SIZE
] __attribute__((aligned(PAGE_SIZE
))); // Temp stack for slave init
174 * Initialize dummy structs for profiling. These aren't used but
175 * allows hertz_tick() to be built with GPROF defined.
177 struct profile_vars _profile_vars
;
178 struct profile_vars
*_profile_vars_cpus
[MAX_CPUS
] = { &_profile_vars
};
179 #define GPROF_INIT() \
183 /* Hack to initialize pointers to unused profiling structs */ \
184 for (i = 1; i < MAX_CPUS; i++) \
185 _profile_vars_cpus[i] = &_profile_vars; \
191 static lck_grp_t smp_lck_grp
;
192 static lck_grp_attr_t smp_lck_grp_attr
;
194 extern void slave_pstart(void);
199 simple_lock_init(&mp_kdp_lock
, 0);
200 simple_lock_init(&mp_rv_lock
, 0);
201 lck_grp_attr_setdefault(&smp_lck_grp_attr
);
202 lck_grp_init(&smp_lck_grp
, "i386_smp", &smp_lck_grp_attr
);
203 lck_mtx_init_ext(&mp_cpu_boot_lock
, &mp_cpu_boot_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
204 lck_mtx_init_ext(&mp_bc_lock
, &mp_bc_lock_ext
, &smp_lck_grp
, LCK_ATTR_NULL
);
213 lapic_set_intr_func(LAPIC_NMI_INTERRUPT
, NMIInterruptHandler
);
214 lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR
), cpu_signal_handler
);
219 DBGLOG_CPU_INIT(master_cpu
);
221 install_real_mode_bootstrap(slave_pstart
);
223 if (PE_parse_boot_argn("TSC_sync_margin",
224 &TSC_sync_margin
, sizeof(TSC_sync_margin
)))
225 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin
);
226 smp_initialized
= TRUE
;
235 } processor_start_info_t
;
236 static processor_start_info_t start_info
__attribute__((aligned(64)));
239 * Cache-alignment is to avoid cross-cpu false-sharing interference.
241 static volatile long tsc_entry_barrier
__attribute__((aligned(64)));
242 static volatile long tsc_exit_barrier
__attribute__((aligned(64)));
243 static volatile uint64_t tsc_target
__attribute__((aligned(64)));
246 * Poll a CPU to see when it has marked itself as running.
249 mp_wait_for_cpu_up(int slot_num
, unsigned int iters
, unsigned int usecdelay
)
251 while (iters
-- > 0) {
252 if (cpu_datap(slot_num
)->cpu_running
)
259 * Quickly bring a CPU back online which has been halted.
262 intel_startCPU_fast(int slot_num
)
267 * Try to perform a fast restart
269 rc
= pmCPUExitHalt(slot_num
);
270 if (rc
!= KERN_SUCCESS
)
272 * The CPU was not eligible for a fast restart.
277 * Wait until the CPU is back online.
279 mp_disable_preemption();
282 * We use short pauses (1us) for low latency. 30,000 iterations is
283 * longer than a full restart would require so it should be more
286 mp_wait_for_cpu_up(slot_num
, 30000, 1);
287 mp_enable_preemption();
290 * Check to make sure that the CPU is really running. If not,
291 * go through the slow path.
293 if (cpu_datap(slot_num
)->cpu_running
)
294 return(KERN_SUCCESS
);
296 return(KERN_FAILURE
);
302 /* Here on the started cpu with cpu_running set TRUE */
304 if (TSC_sync_margin
&&
305 start_info
.target_cpu
== cpu_number()) {
307 * I've just started-up, synchronize again with the starter cpu
308 * and then snap my TSC.
311 atomic_decl(&tsc_entry_barrier
, 1);
312 while (tsc_entry_barrier
!= 0)
313 ; /* spin for starter and target at barrier */
314 tsc_target
= rdtsc64();
315 atomic_decl(&tsc_exit_barrier
, 1);
323 processor_start_info_t
*psip
= (processor_start_info_t
*) arg
;
325 /* Ignore this if the current processor is not the starter */
326 if (cpu_number() != psip
->starter_cpu
)
329 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
330 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_INIT
);
333 LAPIC_WRITE(ICRD
, psip
->target_lapic
<< LAPIC_ICRD_DEST_SHIFT
);
334 LAPIC_WRITE(ICR
, LAPIC_ICR_DM_STARTUP
|(REAL_MODE_BOOTSTRAP_OFFSET
>>12));
336 #ifdef POSTCODE_DELAY
337 /* Wait much longer if postcodes are displayed for a delay period. */
340 mp_wait_for_cpu_up(psip
->target_cpu
, i
*100, 100);
341 if (TSC_sync_margin
&&
342 cpu_datap(psip
->target_cpu
)->cpu_running
) {
344 * Compare the TSC from the started processor with ours.
345 * Report and log/panic if it diverges by more than
346 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
347 * can be overriden by boot-arg (with 0 meaning no checking).
349 uint64_t tsc_starter
;
351 atomic_decl(&tsc_entry_barrier
, 1);
352 while (tsc_entry_barrier
!= 0)
353 ; /* spin for both processors at barrier */
354 tsc_starter
= rdtsc64();
355 atomic_decl(&tsc_exit_barrier
, 1);
356 while (tsc_exit_barrier
!= 0)
357 ; /* spin for target to store its TSC */
358 tsc_delta
= tsc_target
- tsc_starter
;
359 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
360 psip
->target_cpu
, tsc_target
, tsc_delta
, tsc_delta
);
361 if (ABS(tsc_delta
) > (int64_t) TSC_sync_margin
) {
367 "Unsynchronized TSC for cpu %d: "
368 "0x%016llx, delta 0x%llx\n",
369 psip
->target_cpu
, tsc_target
, tsc_delta
);
374 extern char prot_mode_gdt
[];
375 extern char slave_boot_base
[];
376 extern char real_mode_bootstrap_base
[];
377 extern char real_mode_bootstrap_end
[];
378 extern char slave_boot_end
[];
384 int lapic
= cpu_to_lapic
[slot_num
];
389 DBGLOG_CPU_INIT(slot_num
);
391 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num
, lapic
);
392 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD
, (int) IdlePTD
);
395 * Initialize (or re-initialize) the descriptor tables for this cpu.
396 * Propagate processor mode to slave.
398 if (cpu_mode_is64bit())
399 cpu_desc_init64(cpu_datap(slot_num
));
401 cpu_desc_init(cpu_datap(slot_num
));
403 /* Serialize use of the slave boot stack, etc. */
404 lck_mtx_lock(&mp_cpu_boot_lock
);
406 istate
= ml_set_interrupts_enabled(FALSE
);
407 if (slot_num
== get_cpu_number()) {
408 ml_set_interrupts_enabled(istate
);
409 lck_mtx_unlock(&mp_cpu_boot_lock
);
413 start_info
.starter_cpu
= cpu_number();
414 start_info
.target_cpu
= slot_num
;
415 start_info
.target_lapic
= lapic
;
416 tsc_entry_barrier
= 2;
417 tsc_exit_barrier
= 2;
420 * Perform the processor startup sequence with all running
421 * processors rendezvous'ed. This is required during periods when
422 * the cache-disable bit is set for MTRR/PAT initialization.
424 mp_rendezvous_no_intrs(start_cpu
, (void *) &start_info
);
426 start_info
.target_cpu
= 0;
428 ml_set_interrupts_enabled(istate
);
429 lck_mtx_unlock(&mp_cpu_boot_lock
);
431 if (!cpu_datap(slot_num
)->cpu_running
) {
432 kprintf("Failed to start CPU %02d\n", slot_num
);
433 printf("Failed to start CPU %02d, rebooting...\n", slot_num
);
438 kprintf("Started cpu %d (lapic id %08x)\n", slot_num
, lapic
);
444 cpu_signal_event_log_t
*cpu_signal
[MAX_CPUS
];
445 cpu_signal_event_log_t
*cpu_handle
[MAX_CPUS
];
447 MP_EVENT_NAME_DECL();
449 #endif /* MP_DEBUG */
452 cpu_signal_handler(x86_saved_state_t
*regs
)
455 volatile int *my_word
;
456 #if MACH_KDB && MACH_ASSERT
458 #endif /* MACH_KDB && MACH_ASSERT */
460 mp_disable_preemption();
462 my_cpu
= cpu_number();
463 my_word
= ¤t_cpu_datap()->cpu_signals
;
466 #if MACH_KDB && MACH_ASSERT
468 Debugger("cpu_signal_handler: signals did not clear");
469 #endif /* MACH_KDB && MACH_ASSERT */
471 if (i_bit(MP_KDP
, my_word
)) {
472 DBGLOG(cpu_handle
,my_cpu
,MP_KDP
);
473 i_bit_clear(MP_KDP
, my_word
);
474 /* Ensure that the i386_kernel_state at the base of the
475 * current thread's stack (if any) is synchronized with the
476 * context at the moment of the interrupt, to facilitate
477 * access through the debugger.
479 sync_iss_to_iks(regs
);
480 mp_kdp_wait(TRUE
, FALSE
);
482 #endif /* MACH_KDP */
483 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
484 DBGLOG(cpu_handle
,my_cpu
,MP_TLB_FLUSH
);
485 i_bit_clear(MP_TLB_FLUSH
, my_word
);
486 pmap_update_interrupt();
487 } else if (i_bit(MP_AST
, my_word
)) {
488 DBGLOG(cpu_handle
,my_cpu
,MP_AST
);
489 i_bit_clear(MP_AST
, my_word
);
490 ast_check(cpu_to_processor(my_cpu
));
492 } else if (i_bit(MP_KDB
, my_word
)) {
494 i_bit_clear(MP_KDB
, my_word
);
495 current_cpu_datap()->cpu_kdb_is_slave
++;
497 current_cpu_datap()->cpu_kdb_is_slave
--;
498 #endif /* MACH_KDB */
499 } else if (i_bit(MP_RENDEZVOUS
, my_word
)) {
500 DBGLOG(cpu_handle
,my_cpu
,MP_RENDEZVOUS
);
501 i_bit_clear(MP_RENDEZVOUS
, my_word
);
502 mp_rendezvous_action();
503 } else if (i_bit(MP_BROADCAST
, my_word
)) {
504 DBGLOG(cpu_handle
,my_cpu
,MP_BROADCAST
);
505 i_bit_clear(MP_BROADCAST
, my_word
);
506 mp_broadcast_action();
507 } else if (i_bit(MP_CHUD
, my_word
)) {
508 DBGLOG(cpu_handle
,my_cpu
,MP_CHUD
);
509 i_bit_clear(MP_CHUD
, my_word
);
510 chudxnu_cpu_signal_handler();
511 } else if (i_bit(MP_CALL
, my_word
)) {
512 DBGLOG(cpu_handle
,my_cpu
,MP_CALL
);
513 i_bit_clear(MP_CALL
, my_word
);
514 mp_cpus_call_action();
515 } else if (i_bit(MP_CALL_PM
, my_word
)) {
516 DBGLOG(cpu_handle
,my_cpu
,MP_CALL_PM
);
517 i_bit_clear(MP_CALL_PM
, my_word
);
522 mp_enable_preemption();
528 NMIInterruptHandler(x86_saved_state_t
*regs
)
532 sync_iss_to_iks_unconditionally(regs
);
533 #if defined (__i386__)
534 __asm__
volatile("movl %%ebp, %0" : "=m" (stackptr
));
535 #elif defined (__x86_64__)
536 __asm__
volatile("movq %%rbp, %0" : "=m" (stackptr
));
539 if (cpu_number() == debugger_cpu
)
542 if (pmap_tlb_flush_timeout
== TRUE
&& current_cpu_datap()->cpu_tlb_invalid
) {
544 snprintf(&pstr
[0], sizeof(pstr
), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
545 panic_i386_backtrace(stackptr
, 16, &pstr
[0], TRUE
, regs
);
549 mp_kdp_wait(FALSE
, pmap_tlb_flush_timeout
);
556 int max_lock_loops
= 100000000;
557 int trappedalready
= 0; /* (BRINGUP) */
558 #endif /* MP_DEBUG */
561 i386_cpu_IPI(int cpu
)
566 if(cpu_datap(cpu
)->cpu_signals
& 6) { /* (BRINGUP) */
567 kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu
)->cpu_signals
, cpu
);
569 #endif /* MP_DEBUG */
573 if(!trappedalready
&& (cpu_datap(cpu
)->cpu_signals
& 6)) { /* (BRINGUP) */
574 if(kdb_cpu
!= cpu_number()) {
576 panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n",
577 cpu_datap(cpu
)->cpu_signals
, cpu
, kdb_cpu
);
580 #endif /* MP_DEBUG */
583 /* Wait for previous interrupt to be delivered... */
585 int pending_busy_count
= 0;
586 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
587 if (++pending_busy_count
> max_lock_loops
)
588 panic("i386_cpu_IPI() deadlock\n");
590 while (LAPIC_READ(ICR
) & LAPIC_ICR_DS_PENDING
) {
591 #endif /* MP_DEBUG */
595 state
= ml_set_interrupts_enabled(FALSE
);
596 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
597 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
) | LAPIC_ICR_DM_FIXED
);
598 (void) ml_set_interrupts_enabled(state
);
602 * cpu_interrupt is really just to be used by the scheduler to
603 * get a CPU's attention it may not always issue an IPI. If an
604 * IPI is always needed then use i386_cpu_IPI.
607 cpu_interrupt(int cpu
)
610 && pmCPUExitIdle(cpu_datap(cpu
))) {
616 * Send a true NMI via the local APIC to the specified CPU.
619 cpu_NMI_interrupt(int cpu
)
623 if (smp_initialized
) {
624 state
= ml_set_interrupts_enabled(FALSE
);
625 /* Program the interrupt command register */
626 LAPIC_WRITE(ICRD
, cpu_to_lapic
[cpu
] << LAPIC_ICRD_DEST_SHIFT
);
627 /* The vector is ignored in this case--the target CPU will enter on the
630 LAPIC_WRITE(ICR
, LAPIC_VECTOR(INTERPROCESSOR
)|LAPIC_ICR_DM_NMI
);
631 (void) ml_set_interrupts_enabled(state
);
635 static void (* volatile mp_PM_func
)(void) = NULL
;
640 assert(!ml_get_interrupts_enabled());
642 if (mp_PM_func
!= NULL
)
647 cpu_PM_interrupt(int cpu
)
649 assert(!ml_get_interrupts_enabled());
651 if (mp_PM_func
!= NULL
) {
652 if (cpu
== cpu_number())
655 i386_signal_cpu(cpu
, MP_CALL_PM
, ASYNC
);
660 PM_interrupt_register(void (*fn
)(void))
666 i386_signal_cpu(int cpu
, mp_event_t event
, mp_sync_t mode
)
668 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
669 uint64_t tsc_timeout
;
672 if (!cpu_datap(cpu
)->cpu_running
)
675 if (event
== MP_TLB_FLUSH
)
676 KERNEL_DEBUG(0xef800020 | DBG_FUNC_START
, cpu
, 0, 0, 0, 0);
678 DBGLOG(cpu_signal
, cpu
, event
);
680 i_bit_set(event
, signals
);
684 tsc_timeout
= rdtsc64() + (1000*1000*1000);
685 while (i_bit(event
, signals
) && rdtsc64() < tsc_timeout
) {
688 if (i_bit(event
, signals
)) {
689 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
694 if (event
== MP_TLB_FLUSH
)
695 KERNEL_DEBUG(0xef800020 | DBG_FUNC_END
, cpu
, 0, 0, 0, 0);
699 * Send event to all running cpus.
700 * Called with the topology locked.
703 i386_signal_cpus(mp_event_t event
, mp_sync_t mode
)
706 unsigned int my_cpu
= cpu_number();
708 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
710 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
711 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
713 i386_signal_cpu(cpu
, event
, mode
);
718 * Return the number of running cpus.
719 * Called with the topology locked.
722 i386_active_cpus(void)
725 unsigned int ncpus
= 0;
727 assert(hw_lock_held((hw_lock_t
)&x86_topo_lock
));
729 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
730 if (cpu_datap(cpu
)->cpu_running
)
737 * All-CPU rendezvous:
738 * - CPUs are signalled,
739 * - all execute the setup function (if specified),
740 * - rendezvous (i.e. all cpus reach a barrier),
741 * - all execute the action function (if specified),
742 * - rendezvous again,
743 * - execute the teardown function (if specified), and then
746 * Note that the supplied external functions _must_ be reentrant and aware
747 * that they are running in parallel and in an unknown lock context.
751 mp_rendezvous_action(void)
753 boolean_t intrs_enabled
;
756 if (mp_rv_setup_func
!= NULL
)
757 mp_rv_setup_func(mp_rv_func_arg
);
759 intrs_enabled
= ml_get_interrupts_enabled();
762 /* spin on entry rendezvous */
763 atomic_incl(&mp_rv_entry
, 1);
764 while (mp_rv_entry
< mp_rv_ncpus
) {
765 /* poll for pesky tlb flushes if interrupts disabled */
767 handle_pending_TLB_flushes();
770 /* action function */
771 if (mp_rv_action_func
!= NULL
)
772 mp_rv_action_func(mp_rv_func_arg
);
773 /* spin on exit rendezvous */
774 atomic_incl(&mp_rv_exit
, 1);
775 while (mp_rv_exit
< mp_rv_ncpus
) {
777 handle_pending_TLB_flushes();
780 /* teardown function */
781 if (mp_rv_teardown_func
!= NULL
)
782 mp_rv_teardown_func(mp_rv_func_arg
);
784 /* Bump completion count */
785 atomic_incl(&mp_rv_complete
, 1);
789 mp_rendezvous(void (*setup_func
)(void *),
790 void (*action_func
)(void *),
791 void (*teardown_func
)(void *),
795 if (!smp_initialized
) {
796 if (setup_func
!= NULL
)
798 if (action_func
!= NULL
)
800 if (teardown_func
!= NULL
)
805 /* obtain rendezvous lock */
806 simple_lock(&mp_rv_lock
);
808 /* set static function pointers */
809 mp_rv_setup_func
= setup_func
;
810 mp_rv_action_func
= action_func
;
811 mp_rv_teardown_func
= teardown_func
;
812 mp_rv_func_arg
= arg
;
819 * signal other processors, which will call mp_rendezvous_action()
820 * with interrupts disabled
822 simple_lock(&x86_topo_lock
);
823 mp_rv_ncpus
= i386_active_cpus();
824 i386_signal_cpus(MP_RENDEZVOUS
, ASYNC
);
825 simple_unlock(&x86_topo_lock
);
827 /* call executor function on this cpu */
828 mp_rendezvous_action();
831 * Spin for everyone to complete.
832 * This is necessary to ensure that all processors have proceeded
833 * from the exit barrier before we release the rendezvous structure.
835 while (mp_rv_complete
< mp_rv_ncpus
) {
840 mp_rv_setup_func
= NULL
;
841 mp_rv_action_func
= NULL
;
842 mp_rv_teardown_func
= NULL
;
843 mp_rv_func_arg
= NULL
;
846 simple_unlock(&mp_rv_lock
);
850 mp_rendezvous_break_lock(void)
852 simple_lock_init(&mp_rv_lock
, 0);
856 setup_disable_intrs(__unused
void * param_not_used
)
858 /* disable interrupts before the first barrier */
859 boolean_t intr
= ml_set_interrupts_enabled(FALSE
);
861 current_cpu_datap()->cpu_iflag
= intr
;
862 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
866 teardown_restore_intrs(__unused
void * param_not_used
)
868 /* restore interrupt flag following MTRR changes */
869 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag
);
870 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__
);
874 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
875 * This is exported for use by kexts.
878 mp_rendezvous_no_intrs(
879 void (*action_func
)(void *),
882 mp_rendezvous(setup_disable_intrs
,
884 teardown_restore_intrs
,
889 handle_pending_TLB_flushes(void)
891 volatile int *my_word
= ¤t_cpu_datap()->cpu_signals
;
893 if (i_bit(MP_TLB_FLUSH
, my_word
)) {
894 DBGLOG(cpu_handle
, cpu_number(), MP_TLB_FLUSH
);
895 i_bit_clear(MP_TLB_FLUSH
, my_word
);
896 pmap_update_interrupt();
901 * This is called from cpu_signal_handler() to process an MP_CALL signal.
904 mp_cpus_call_action(void)
906 if (mp_rv_action_func
!= NULL
)
907 mp_rv_action_func(mp_rv_func_arg
);
908 atomic_incl(&mp_rv_complete
, 1);
912 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
913 * If the mode is SYNC, the function is called serially on the target cpus
914 * in logical cpu order. If the mode is ASYNC, the function is called in
915 * parallel over the specified cpus.
916 * The action function may be NULL.
917 * The cpu mask may include the local cpu. Offline cpus are ignored.
918 * Return does not occur until the function has completed on all cpus.
919 * The return value is the number of cpus on which the function was called.
925 void (*action_func
)(void *),
929 boolean_t intrs_enabled
= ml_get_interrupts_enabled();
930 boolean_t call_self
= FALSE
;
932 if (!smp_initialized
) {
933 if ((cpus
& CPUMASK_SELF
) == 0)
935 if (action_func
!= NULL
) {
936 (void) ml_set_interrupts_enabled(FALSE
);
938 ml_set_interrupts_enabled(intrs_enabled
);
943 /* obtain rendezvous lock */
944 simple_lock(&mp_rv_lock
);
946 /* Use the rendezvous data structures for this call */
947 mp_rv_action_func
= action_func
;
948 mp_rv_func_arg
= arg
;
952 simple_lock(&x86_topo_lock
);
953 for (cpu
= 0; cpu
< (cpu_t
) real_ncpus
; cpu
++) {
954 if (((cpu_to_cpumask(cpu
) & cpus
) == 0) ||
955 !cpu_datap(cpu
)->cpu_running
)
957 if (cpu
== (cpu_t
) cpu_number()) {
959 * We don't IPI ourself and if calling asynchronously,
960 * we defer our call until we have signalled all others.
963 if (mode
== SYNC
&& action_func
!= NULL
) {
964 (void) ml_set_interrupts_enabled(FALSE
);
966 ml_set_interrupts_enabled(intrs_enabled
);
970 * Bump count of other cpus called and signal this cpu.
971 * Note: we signal asynchronously regardless of mode
972 * because we wait on mp_rv_complete either here
973 * (if mode == SYNC) or later (if mode == ASYNC).
974 * While spinning, poll for TLB flushes if interrupts
978 i386_signal_cpu(cpu
, MP_CALL
, ASYNC
);
980 simple_unlock(&x86_topo_lock
);
981 while (mp_rv_complete
< mp_rv_ncpus
) {
983 handle_pending_TLB_flushes();
986 simple_lock(&x86_topo_lock
);
990 simple_unlock(&x86_topo_lock
);
993 * If calls are being made asynchronously,
994 * make the local call now if needed, and then
995 * wait for all other cpus to finish their calls.
998 if (call_self
&& action_func
!= NULL
) {
999 (void) ml_set_interrupts_enabled(FALSE
);
1001 ml_set_interrupts_enabled(intrs_enabled
);
1003 while (mp_rv_complete
< mp_rv_ncpus
) {
1005 handle_pending_TLB_flushes();
1010 /* Determine the number of cpus called */
1011 cpu
= mp_rv_ncpus
+ (call_self
? 1 : 0);
1013 simple_unlock(&mp_rv_lock
);
1019 mp_broadcast_action(void)
1021 /* call action function */
1022 if (mp_bc_action_func
!= NULL
)
1023 mp_bc_action_func(mp_bc_func_arg
);
1025 /* if we're the last one through, wake up the instigator */
1026 if (atomic_decl_and_test(&mp_bc_count
, 1))
1027 thread_wakeup(((event_t
)(uintptr_t) &mp_bc_count
));
1031 * mp_broadcast() runs a given function on all active cpus.
1032 * The caller blocks until the functions has run on all cpus.
1033 * The caller will also block if there is another pending braodcast.
1037 void (*action_func
)(void *),
1040 if (!smp_initialized
) {
1041 if (action_func
!= NULL
)
1046 /* obtain broadcast lock */
1047 lck_mtx_lock(&mp_bc_lock
);
1049 /* set static function pointers */
1050 mp_bc_action_func
= action_func
;
1051 mp_bc_func_arg
= arg
;
1053 assert_wait((event_t
)(uintptr_t)&mp_bc_count
, THREAD_UNINT
);
1056 * signal other processors, which will call mp_broadcast_action()
1058 simple_lock(&x86_topo_lock
);
1059 mp_bc_ncpus
= i386_active_cpus(); /* total including this cpu */
1060 mp_bc_count
= mp_bc_ncpus
;
1061 i386_signal_cpus(MP_BROADCAST
, ASYNC
);
1063 /* call executor function on this cpu */
1064 mp_broadcast_action();
1065 simple_unlock(&x86_topo_lock
);
1067 /* block for all cpus to have run action_func */
1068 if (mp_bc_ncpus
> 1)
1069 thread_block(THREAD_CONTINUE_NULL
);
1071 clear_wait(current_thread(), THREAD_AWAKENED
);
1074 lck_mtx_unlock(&mp_bc_lock
);
1078 i386_activate_cpu(void)
1080 cpu_data_t
*cdp
= current_cpu_datap();
1082 assert(!ml_get_interrupts_enabled());
1084 if (!smp_initialized
) {
1085 cdp
->cpu_running
= TRUE
;
1089 simple_lock(&x86_topo_lock
);
1090 cdp
->cpu_running
= TRUE
;
1092 simple_unlock(&x86_topo_lock
);
1095 extern void etimer_timer_expire(void *arg
);
1098 i386_deactivate_cpu(void)
1100 cpu_data_t
*cdp
= current_cpu_datap();
1102 assert(!ml_get_interrupts_enabled());
1104 simple_lock(&x86_topo_lock
);
1105 cdp
->cpu_running
= FALSE
;
1106 simple_unlock(&x86_topo_lock
);
1108 timer_queue_shutdown(&cdp
->rtclock_timer
.queue
);
1109 cdp
->rtclock_timer
.deadline
= EndOfAllTime
;
1110 mp_cpus_call(cpu_to_cpumask(master_cpu
), ASYNC
, etimer_timer_expire
, NULL
);
1113 * In case a rendezvous/braodcast/call was initiated to this cpu
1114 * before we cleared cpu_running, we must perform any actions due.
1116 if (i_bit(MP_RENDEZVOUS
, &cdp
->cpu_signals
))
1117 mp_rendezvous_action();
1118 if (i_bit(MP_BROADCAST
, &cdp
->cpu_signals
))
1119 mp_broadcast_action();
1120 if (i_bit(MP_CALL
, &cdp
->cpu_signals
))
1121 mp_cpus_call_action();
1122 cdp
->cpu_signals
= 0; /* all clear */
1125 int pmsafe_debug
= 1;
1128 volatile boolean_t mp_kdp_trap
= FALSE
;
1129 volatile unsigned long mp_kdp_ncpus
;
1130 boolean_t mp_kdp_state
;
1138 unsigned int my_cpu
;
1139 uint64_t tsc_timeout
;
1141 DBG("mp_kdp_enter()\n");
1144 * Here to enter the debugger.
1145 * In case of races, only one cpu is allowed to enter kdp after
1148 mp_kdp_state
= ml_set_interrupts_enabled(FALSE
);
1149 simple_lock(&mp_kdp_lock
);
1150 debugger_entry_time
= mach_absolute_time();
1152 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1154 while (mp_kdp_trap
) {
1155 simple_unlock(&mp_kdp_lock
);
1156 DBG("mp_kdp_enter() race lost\n");
1158 mp_kdp_wait(TRUE
, FALSE
);
1160 simple_lock(&mp_kdp_lock
);
1162 my_cpu
= cpu_number();
1163 debugger_cpu
= my_cpu
;
1164 mp_kdp_ncpus
= 1; /* self */
1166 simple_unlock(&mp_kdp_lock
);
1169 * Deliver a nudge to other cpus, counting how many
1171 DBG("mp_kdp_enter() signaling other processors\n");
1172 if (force_immediate_debugger_NMI
== FALSE
) {
1173 for (ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1174 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1177 i386_signal_cpu(cpu
, MP_KDP
, ASYNC
);
1180 * Wait other processors to synchronize
1182 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus
);
1185 * This timeout is rather arbitrary; we don't want to NMI
1186 * processors that are executing at potentially
1187 * "unsafe-to-interrupt" points such as the trampolines,
1188 * but neither do we want to lose state by waiting too long.
1190 tsc_timeout
= rdtsc64() + (ncpus
* 1000 * 1000);
1192 while (mp_kdp_ncpus
!= ncpus
&& rdtsc64() < tsc_timeout
) {
1194 * A TLB shootdown request may be pending--this would
1195 * result in the requesting processor waiting in
1196 * PMAP_UPDATE_TLBS() until this processor deals with it.
1197 * Process it, so it can now enter mp_kdp_wait()
1199 handle_pending_TLB_flushes();
1202 /* If we've timed out, and some processor(s) are still unresponsive,
1203 * interrupt them with an NMI via the local APIC.
1205 if (mp_kdp_ncpus
!= ncpus
) {
1206 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1207 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1209 if (cpu_signal_pending(cpu
, MP_KDP
))
1210 cpu_NMI_interrupt(cpu
);
1215 for (cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1216 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1218 cpu_NMI_interrupt(cpu
);
1221 DBG("mp_kdp_enter() %u processors done %s\n",
1222 mp_kdp_ncpus
, (mp_kdp_ncpus
== ncpus
) ? "OK" : "timed out");
1224 postcode(MP_KDP_ENTER
);
1228 cpu_signal_pending(int cpu
, mp_event_t event
)
1230 volatile int *signals
= &cpu_datap(cpu
)->cpu_signals
;
1231 boolean_t retval
= FALSE
;
1233 if (i_bit(event
, signals
))
1238 long kdp_x86_xcpu_invoke(const uint16_t lcpu
, kdp_x86_xcpu_func_t func
,
1239 void *arg0
, void *arg1
)
1241 if (lcpu
> (real_ncpus
- 1))
1247 kdp_xcpu_call_func
.func
= func
;
1248 kdp_xcpu_call_func
.ret
= -1;
1249 kdp_xcpu_call_func
.arg0
= arg0
;
1250 kdp_xcpu_call_func
.arg1
= arg1
;
1251 kdp_xcpu_call_func
.cpu
= lcpu
;
1252 DBG("Invoking function %p on CPU %d\n", func
, (int32_t)lcpu
);
1253 while (kdp_xcpu_call_func
.cpu
!= KDP_XCPU_NONE
)
1255 return kdp_xcpu_call_func
.ret
;
1259 kdp_x86_xcpu_poll(void)
1261 if ((uint16_t)cpu_number() == kdp_xcpu_call_func
.cpu
) {
1262 kdp_xcpu_call_func
.ret
=
1263 kdp_xcpu_call_func
.func(kdp_xcpu_call_func
.arg0
,
1264 kdp_xcpu_call_func
.arg1
,
1266 kdp_xcpu_call_func
.cpu
= KDP_XCPU_NONE
;
1271 mp_kdp_wait(boolean_t flush
, boolean_t isNMI
)
1273 DBG("mp_kdp_wait()\n");
1274 /* If an I/O port has been specified as a debugging aid, issue a read */
1275 panic_io_port_read();
1278 /* If we've trapped due to a machine-check, save MCA registers */
1283 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_SAFE
);
1285 atomic_incl((volatile long *)&mp_kdp_ncpus
, 1);
1286 while (mp_kdp_trap
|| (isNMI
== TRUE
)) {
1288 * A TLB shootdown request may be pending--this would result
1289 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1290 * until this processor handles it.
1291 * Process it, so it can now enter mp_kdp_wait()
1294 handle_pending_TLB_flushes();
1296 kdp_x86_xcpu_poll();
1301 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1303 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1304 DBG("mp_kdp_wait() done\n");
1310 DBG("mp_kdp_exit()\n");
1312 atomic_decl((volatile long *)&mp_kdp_ncpus
, 1);
1314 debugger_exit_time
= mach_absolute_time();
1316 mp_kdp_trap
= FALSE
;
1317 __asm__
volatile("mfence");
1319 /* Wait other processors to stop spinning. XXX needs timeout */
1320 DBG("mp_kdp_exit() waiting for processors to resume\n");
1321 while (mp_kdp_ncpus
> 0) {
1323 * a TLB shootdown request may be pending... this would result in the requesting
1324 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1325 * Process it, so it can now enter mp_kdp_wait()
1327 handle_pending_TLB_flushes();
1333 pmSafeMode(¤t_cpu_datap()->lcpu
, PM_SAFE_FL_NORMAL
);
1335 DBG("mp_kdp_exit() done\n");
1336 (void) ml_set_interrupts_enabled(mp_kdp_state
);
1339 #endif /* MACH_KDP */
1342 mp_recent_debugger_activity() {
1343 return (((mach_absolute_time() - debugger_entry_time
) < LastDebuggerEntryAllowance
) ||
1344 ((mach_absolute_time() - debugger_exit_time
) < LastDebuggerEntryAllowance
));
1350 __unused processor_t processor
)
1356 processor_t processor
)
1358 int cpu
= processor
->cpu_id
;
1360 if (cpu
!= cpu_number()) {
1361 i386_signal_cpu(cpu
, MP_AST
, ASYNC
);
1367 * invoke kdb on slave processors
1373 unsigned int my_cpu
= cpu_number();
1376 uint64_t tsc_timeout
= 0;
1380 for (kdb_ncpus
= 1, cpu
= 0; cpu
< real_ncpus
; cpu
++) {
1381 if (cpu
== my_cpu
|| !cpu_datap(cpu
)->cpu_running
)
1384 i386_signal_cpu(cpu
, MP_KDB
, ASYNC
);
1386 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus
);
1388 tsc_timeout
= rdtsc64() + (kdb_ncpus
* 100 * 1000 * 1000);
1390 while (mp_kdb_ncpus
!= kdb_ncpus
&& rdtsc64() < tsc_timeout
) {
1392 * a TLB shootdown request may be pending... this would result in the requesting
1393 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1394 * Process it, so it can now enter mp_kdp_wait()
1396 handle_pending_TLB_flushes();
1400 DBG("mp_kdp_enter() %d processors done %s\n",
1401 mp_kdb_ncpus
, (mp_kdb_ncpus
== kdb_ncpus
) ? "OK" : "timed out");
1407 DBG("mp_kdb_wait()\n");
1409 /* If an I/O port has been specified as a debugging aid, issue a read */
1410 panic_io_port_read();
1412 atomic_incl(&mp_kdb_ncpus
, 1);
1413 while (mp_kdb_trap
) {
1415 * a TLB shootdown request may be pending... this would result in the requesting
1416 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1417 * Process it, so it can now enter mp_kdp_wait()
1419 handle_pending_TLB_flushes();
1423 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1424 DBG("mp_kdb_wait() done\n");
1428 * Clear kdb interrupt
1432 clear_kdb_intr(void)
1434 mp_disable_preemption();
1435 i_bit_clear(MP_KDB
, ¤t_cpu_datap()->cpu_signals
);
1436 mp_enable_preemption();
1442 DBG("mp_kdb_exit()\n");
1443 atomic_decl((volatile long *)&mp_kdb_ncpus
, 1);
1444 mp_kdb_trap
= FALSE
;
1445 __asm__
volatile("mfence");
1447 while (mp_kdb_ncpus
> 0) {
1449 * a TLB shootdown request may be pending... this would result in the requesting
1450 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1451 * Process it, so it can now enter mp_kdp_wait()
1453 handle_pending_TLB_flushes();
1458 DBG("mp_kdb_exit() done\n");
1461 #endif /* MACH_KDB */
1464 slave_machine_init(void *param
)
1467 * Here in process context, but with interrupts disabled.
1469 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1471 if (param
== FULL_SLAVE_INIT
) {
1477 cpu_machine_init(); /* Interrupts enabled hereafter */
1482 int cpu_number(void)
1484 return get_cpu_number();
1488 #include <ddb/db_output.h>
1490 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1495 struct mp_trap_hist_struct
{
1497 unsigned char data
[5];
1498 } trap_hist
[MTRAPS
], *cur_trap_hist
= trap_hist
,
1499 *max_trap_hist
= &trap_hist
[MTRAPS
];
1501 void db_trap_hist(void);
1521 for(i
=0;i
<MTRAPS
;i
++)
1522 if (trap_hist
[i
].type
== 1 || trap_hist
[i
].type
== 2) {
1524 (&trap_hist
[i
]>=cur_trap_hist
)?"*":" ",
1525 (trap_hist
[i
].type
== 1)?"SPL":"INT");
1527 db_printf(" %02x", trap_hist
[i
].data
[j
]);
1532 #endif /* TRAP_DEBUG */
1533 #endif /* MACH_KDB */