]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/mp.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / osfmk / i386 / mp.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #include <mach_rt.h>
33 #include <mach_kdb.h>
34 #include <mach_kdp.h>
35 #include <mach_ldebug.h>
36 #include <gprof.h>
37
38 #include <mach/mach_types.h>
39 #include <mach/kern_return.h>
40
41 #include <kern/kern_types.h>
42 #include <kern/startup.h>
43 #include <kern/processor.h>
44 #include <kern/cpu_number.h>
45 #include <kern/cpu_data.h>
46 #include <kern/assert.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49
50 #include <vm/vm_map.h>
51 #include <vm/vm_kern.h>
52
53 #include <profiling/profile-mk.h>
54
55 #include <i386/mp.h>
56 #include <i386/mp_events.h>
57 #include <i386/mp_slave_boot.h>
58 #include <i386/apic.h>
59 #include <i386/ipl.h>
60 #include <i386/fpu.h>
61 #include <i386/cpuid.h>
62 #include <i386/proc_reg.h>
63 #include <i386/machine_cpu.h>
64 #include <i386/misc_protos.h>
65 #include <i386/mtrr.h>
66 #include <i386/vmx/vmx_cpu.h>
67 #include <i386/postcode.h>
68 #include <i386/perfmon.h>
69 #include <i386/cpu_threads.h>
70 #include <i386/mp_desc.h>
71 #include <i386/trap.h>
72 #include <i386/machine_routines.h>
73 #include <i386/pmCPU.h>
74 #include <i386/hpet.h>
75 #include <i386/machine_check.h>
76
77 #include <chud/chud_xnu.h>
78 #include <chud/chud_xnu_private.h>
79
80 #include <sys/kdebug.h>
81 #if MACH_KDB
82 #include <i386/db_machdep.h>
83 #include <ddb/db_aout.h>
84 #include <ddb/db_access.h>
85 #include <ddb/db_sym.h>
86 #include <ddb/db_variables.h>
87 #include <ddb/db_command.h>
88 #include <ddb/db_output.h>
89 #include <ddb/db_expr.h>
90 #endif
91
92 #if MP_DEBUG
93 #define PAUSE delay(1000000)
94 #define DBG(x...) kprintf(x)
95 #else
96 #define DBG(x...)
97 #define PAUSE
98 #endif /* MP_DEBUG */
99
100 /* Initialize lapic_id so cpu_number() works on non SMP systems */
101 unsigned long lapic_id_initdata = 0;
102 unsigned long lapic_id = (unsigned long)&lapic_id_initdata;
103 vm_offset_t lapic_start;
104
105 static i386_intr_func_t lapic_timer_func;
106 static i386_intr_func_t lapic_pmi_func;
107 static i386_intr_func_t lapic_thermal_func;
108
109 /* TRUE if local APIC was enabled by the OS not by the BIOS */
110 static boolean_t lapic_os_enabled = FALSE;
111
112 /* Base vector for local APIC interrupt sources */
113 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
114
115 void slave_boot_init(void);
116
117 #if MACH_KDB
118 static void mp_kdb_wait(void);
119 volatile boolean_t mp_kdb_trap = FALSE;
120 volatile long mp_kdb_ncpus = 0;
121 #endif
122
123 static void mp_kdp_wait(boolean_t flush);
124 static void mp_rendezvous_action(void);
125 static void mp_broadcast_action(void);
126
127 static int NMIInterruptHandler(x86_saved_state_t *regs);
128 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
129 static void cpu_NMI_interrupt(int cpu);
130
131 boolean_t smp_initialized = FALSE;
132 boolean_t force_immediate_debugger_NMI = FALSE;
133
134 decl_simple_lock_data(,mp_kdp_lock);
135
136 decl_mutex_data(static, mp_cpu_boot_lock);
137
138 /* Variables needed for MP rendezvous. */
139 decl_simple_lock_data(,mp_rv_lock);
140 static void (*mp_rv_setup_func)(void *arg);
141 static void (*mp_rv_action_func)(void *arg);
142 static void (*mp_rv_teardown_func)(void *arg);
143 static void *mp_rv_func_arg;
144 static int mp_rv_ncpus;
145 /* Cache-aligned barriers: */
146 static volatile long mp_rv_entry __attribute__((aligned(64)));
147 static volatile long mp_rv_exit __attribute__((aligned(64)));
148 static volatile long mp_rv_complete __attribute__((aligned(64)));
149
150 /* Variables needed for MP broadcast. */
151 static void (*mp_bc_action_func)(void *arg);
152 static void *mp_bc_func_arg;
153 static int mp_bc_ncpus;
154 static volatile long mp_bc_count;
155 decl_mutex_data(static, mp_bc_lock);
156
157 static void mp_cpus_call_action(void);
158
159 int lapic_to_cpu[MAX_CPUS];
160 int cpu_to_lapic[MAX_CPUS];
161
162 static void
163 lapic_cpu_map_init(void)
164 {
165 int i;
166
167 for (i = 0; i < MAX_CPUS; i++) {
168 lapic_to_cpu[i] = -1;
169 cpu_to_lapic[i] = -1;
170 }
171 }
172
173 void
174 lapic_cpu_map(int apic_id, int cpu)
175 {
176 cpu_to_lapic[cpu] = apic_id;
177 lapic_to_cpu[apic_id] = cpu;
178 }
179
180 /*
181 * Retrieve the local apic ID a cpu.
182 *
183 * Returns the local apic ID for the given processor.
184 * If the processor does not exist or apic not configured, returns -1.
185 */
186
187 uint32_t
188 ml_get_apicid(uint32_t cpu)
189 {
190 if(cpu >= (uint32_t)MAX_CPUS)
191 return 0xFFFFFFFF; /* Return -1 if cpu too big */
192
193 /* Return the apic ID (or -1 if not configured) */
194 return (uint32_t)cpu_to_lapic[cpu];
195
196 }
197
198 #ifdef MP_DEBUG
199 static void
200 lapic_cpu_map_dump(void)
201 {
202 int i;
203
204 for (i = 0; i < MAX_CPUS; i++) {
205 if (cpu_to_lapic[i] == -1)
206 continue;
207 kprintf("cpu_to_lapic[%d]: %d\n",
208 i, cpu_to_lapic[i]);
209 }
210 for (i = 0; i < MAX_CPUS; i++) {
211 if (lapic_to_cpu[i] == -1)
212 continue;
213 kprintf("lapic_to_cpu[%d]: %d\n",
214 i, lapic_to_cpu[i]);
215 }
216 }
217 #define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump()
218 #define LAPIC_DUMP() lapic_dump()
219 #else
220 #define LAPIC_CPU_MAP_DUMP()
221 #define LAPIC_DUMP()
222 #endif /* MP_DEBUG */
223
224 #if GPROF
225 /*
226 * Initialize dummy structs for profiling. These aren't used but
227 * allows hertz_tick() to be built with GPROF defined.
228 */
229 struct profile_vars _profile_vars;
230 struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
231 #define GPROF_INIT() \
232 { \
233 int i; \
234 \
235 /* Hack to initialize pointers to unused profiling structs */ \
236 for (i = 1; i < MAX_CPUS; i++) \
237 _profile_vars_cpus[i] = &_profile_vars; \
238 }
239 #else
240 #define GPROF_INIT()
241 #endif /* GPROF */
242
243 void
244 smp_init(void)
245 {
246 int result;
247 vm_map_entry_t entry;
248 uint32_t lo;
249 uint32_t hi;
250 boolean_t is_boot_processor;
251 boolean_t is_lapic_enabled;
252 vm_offset_t lapic_base;
253
254 simple_lock_init(&mp_kdp_lock, 0);
255 simple_lock_init(&mp_rv_lock, 0);
256 mutex_init(&mp_cpu_boot_lock, 0);
257 mutex_init(&mp_bc_lock, 0);
258 console_init();
259
260 /* Local APIC? */
261 if (!lapic_probe())
262 return;
263
264 /* Examine the local APIC state */
265 rdmsr(MSR_IA32_APIC_BASE, lo, hi);
266 is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
267 is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
268 lapic_base = (lo & MSR_IA32_APIC_BASE_BASE);
269 kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base,
270 is_lapic_enabled ? "enabled" : "disabled",
271 is_boot_processor ? "BSP" : "AP");
272 if (!is_boot_processor || !is_lapic_enabled)
273 panic("Unexpected local APIC state\n");
274
275 /* Establish a map to the local apic */
276 lapic_start = vm_map_min(kernel_map);
277 result = vm_map_find_space(kernel_map,
278 (vm_map_address_t *) &lapic_start,
279 round_page(LAPIC_SIZE), 0,
280 VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
281 if (result != KERN_SUCCESS) {
282 panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
283 }
284 vm_map_unlock(kernel_map);
285 /* Map in the local APIC non-cacheable, as recommended by Intel
286 * in section 8.4.1 of the "System Programming Guide".
287 */
288 pmap_enter(pmap_kernel(),
289 lapic_start,
290 (ppnum_t) i386_btop(lapic_base),
291 VM_PROT_READ|VM_PROT_WRITE,
292 VM_WIMG_IO,
293 TRUE);
294 lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
295
296 if ((LAPIC_REG(VERSION)&LAPIC_VERSION_MASK) != 0x14) {
297 printf("Local APIC version not 0x14 as expected\n");
298 }
299
300 /* Set up the lapic_id <-> cpu_number map and add this boot processor */
301 lapic_cpu_map_init();
302 lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
303 kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
304
305 lapic_init();
306
307 cpu_thread_init();
308
309 GPROF_INIT();
310 DBGLOG_CPU_INIT(master_cpu);
311
312 slave_boot_init();
313
314 smp_initialized = TRUE;
315
316 return;
317 }
318
319
320 static int
321 lapic_esr_read(void)
322 {
323 /* write-read register */
324 LAPIC_REG(ERROR_STATUS) = 0;
325 return LAPIC_REG(ERROR_STATUS);
326 }
327
328 static void
329 lapic_esr_clear(void)
330 {
331 LAPIC_REG(ERROR_STATUS) = 0;
332 LAPIC_REG(ERROR_STATUS) = 0;
333 }
334
335 static const char *DM[8] = {
336 "Fixed",
337 "Lowest Priority",
338 "Invalid",
339 "Invalid",
340 "NMI",
341 "Reset",
342 "Invalid",
343 "ExtINT"};
344
345 void
346 lapic_dump(void)
347 {
348 int i;
349
350 #define BOOL(a) ((a)?' ':'!')
351
352 kprintf("LAPIC %d at 0x%x version 0x%x\n",
353 (LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
354 lapic_start,
355 LAPIC_REG(VERSION)&LAPIC_VERSION_MASK);
356 kprintf("Priorities: Task 0x%x Arbitration 0x%x Processor 0x%x\n",
357 LAPIC_REG(TPR)&LAPIC_TPR_MASK,
358 LAPIC_REG(APR)&LAPIC_APR_MASK,
359 LAPIC_REG(PPR)&LAPIC_PPR_MASK);
360 kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
361 LAPIC_REG(DFR)>>LAPIC_DFR_SHIFT,
362 LAPIC_REG(LDR)>>LAPIC_LDR_SHIFT);
363 kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
364 BOOL(LAPIC_REG(SVR)&LAPIC_SVR_ENABLE),
365 BOOL(!(LAPIC_REG(SVR)&LAPIC_SVR_FOCUS_OFF)),
366 LAPIC_REG(SVR) & LAPIC_SVR_MASK);
367 kprintf("LVT_TIMER: Vector 0x%02x %s %cmasked %s\n",
368 LAPIC_REG(LVT_TIMER)&LAPIC_LVT_VECTOR_MASK,
369 (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
370 BOOL(LAPIC_REG(LVT_TIMER)&LAPIC_LVT_MASKED),
371 (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
372 kprintf(" Initial Count: 0x%08x \n", LAPIC_REG(TIMER_INITIAL_COUNT));
373 kprintf(" Current Count: 0x%08x \n", LAPIC_REG(TIMER_CURRENT_COUNT));
374 kprintf(" Divide Config: 0x%08x \n", LAPIC_REG(TIMER_DIVIDE_CONFIG));
375 kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
376 LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_VECTOR_MASK,
377 DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
378 (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
379 BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED));
380 kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
381 LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_VECTOR_MASK,
382 DM[(LAPIC_REG(LVT_THERMAL)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
383 (LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
384 BOOL(LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_MASKED));
385 kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
386 LAPIC_REG(LVT_LINT0)&LAPIC_LVT_VECTOR_MASK,
387 DM[(LAPIC_REG(LVT_LINT0)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
388 (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
389 (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
390 (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
391 BOOL(LAPIC_REG(LVT_LINT0)&LAPIC_LVT_MASKED));
392 kprintf("LVT_LINT1: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
393 LAPIC_REG(LVT_LINT1)&LAPIC_LVT_VECTOR_MASK,
394 DM[(LAPIC_REG(LVT_LINT1)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
395 (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
396 (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
397 (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
398 BOOL(LAPIC_REG(LVT_LINT1)&LAPIC_LVT_MASKED));
399 kprintf("LVT_ERROR: Vector 0x%02x %s %cmasked\n",
400 LAPIC_REG(LVT_ERROR)&LAPIC_LVT_VECTOR_MASK,
401 (LAPIC_REG(LVT_ERROR)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
402 BOOL(LAPIC_REG(LVT_ERROR)&LAPIC_LVT_MASKED));
403 kprintf("ESR: %08x \n", lapic_esr_read());
404 kprintf(" ");
405 for(i=0xf; i>=0; i--)
406 kprintf("%x%x%x%x",i,i,i,i);
407 kprintf("\n");
408 kprintf("TMR: 0x");
409 for(i=7; i>=0; i--)
410 kprintf("%08x",LAPIC_REG_OFFSET(TMR_BASE, i*0x10));
411 kprintf("\n");
412 kprintf("IRR: 0x");
413 for(i=7; i>=0; i--)
414 kprintf("%08x",LAPIC_REG_OFFSET(IRR_BASE, i*0x10));
415 kprintf("\n");
416 kprintf("ISR: 0x");
417 for(i=7; i >= 0; i--)
418 kprintf("%08x",LAPIC_REG_OFFSET(ISR_BASE, i*0x10));
419 kprintf("\n");
420 }
421
422 #if MACH_KDB
423 /*
424 * Displays apic junk
425 *
426 * da
427 */
428 void
429 db_apic(__unused db_expr_t addr,
430 __unused int have_addr,
431 __unused db_expr_t count,
432 __unused char *modif)
433 {
434
435 lapic_dump();
436
437 return;
438 }
439
440 #endif
441
442 boolean_t
443 lapic_probe(void)
444 {
445 uint32_t lo;
446 uint32_t hi;
447
448 if (cpuid_features() & CPUID_FEATURE_APIC)
449 return TRUE;
450
451 if (cpuid_family() == 6 || cpuid_family() == 15) {
452 /*
453 * Mobile Pentiums:
454 * There may be a local APIC which wasn't enabled by BIOS.
455 * So we try to enable it explicitly.
456 */
457 rdmsr(MSR_IA32_APIC_BASE, lo, hi);
458 lo &= ~MSR_IA32_APIC_BASE_BASE;
459 lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
460 lo |= MSR_IA32_APIC_BASE_ENABLE;
461 wrmsr(MSR_IA32_APIC_BASE, lo, hi);
462
463 /*
464 * Re-initialize cpu features info and re-check.
465 */
466 cpuid_set_info();
467 if (cpuid_features() & CPUID_FEATURE_APIC) {
468 printf("Local APIC discovered and enabled\n");
469 lapic_os_enabled = TRUE;
470 lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
471 return TRUE;
472 }
473 }
474
475 return FALSE;
476 }
477
478 void
479 lapic_shutdown(void)
480 {
481 uint32_t lo;
482 uint32_t hi;
483 uint32_t value;
484
485 /* Shutdown if local APIC was enabled by OS */
486 if (lapic_os_enabled == FALSE)
487 return;
488
489 mp_disable_preemption();
490
491 /* ExtINT: masked */
492 if (get_cpu_number() == master_cpu) {
493 value = LAPIC_REG(LVT_LINT0);
494 value |= LAPIC_LVT_MASKED;
495 LAPIC_REG(LVT_LINT0) = value;
496 }
497
498 /* Timer: masked */
499 LAPIC_REG(LVT_TIMER) |= LAPIC_LVT_MASKED;
500
501 /* Perfmon: masked */
502 LAPIC_REG(LVT_PERFCNT) |= LAPIC_LVT_MASKED;
503
504 /* Error: masked */
505 LAPIC_REG(LVT_ERROR) |= LAPIC_LVT_MASKED;
506
507 /* APIC software disabled */
508 LAPIC_REG(SVR) &= ~LAPIC_SVR_ENABLE;
509
510 /* Bypass the APIC completely and update cpu features */
511 rdmsr(MSR_IA32_APIC_BASE, lo, hi);
512 lo &= ~MSR_IA32_APIC_BASE_ENABLE;
513 wrmsr(MSR_IA32_APIC_BASE, lo, hi);
514 cpuid_set_info();
515
516 mp_enable_preemption();
517 }
518
519 void
520 lapic_init(void)
521 {
522 int value;
523
524 /* Set flat delivery model, logical processor id */
525 LAPIC_REG(DFR) = LAPIC_DFR_FLAT;
526 LAPIC_REG(LDR) = (get_cpu_number()) << LAPIC_LDR_SHIFT;
527
528 /* Accept all */
529 LAPIC_REG(TPR) = 0;
530
531 LAPIC_REG(SVR) = LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE;
532
533 /* ExtINT */
534 if (get_cpu_number() == master_cpu) {
535 value = LAPIC_REG(LVT_LINT0);
536 value &= ~LAPIC_LVT_MASKED;
537 value |= LAPIC_LVT_DM_EXTINT;
538 LAPIC_REG(LVT_LINT0) = value;
539 }
540
541 /* Timer: unmasked, one-shot */
542 LAPIC_REG(LVT_TIMER) = LAPIC_VECTOR(TIMER);
543
544 /* Perfmon: unmasked */
545 LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
546
547 /* Thermal: unmasked */
548 LAPIC_REG(LVT_THERMAL) = LAPIC_VECTOR(THERMAL);
549
550 lapic_esr_clear();
551
552 LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR);
553 }
554
555 void
556 lapic_set_timer_func(i386_intr_func_t func)
557 {
558 lapic_timer_func = func;
559 }
560
561 void
562 lapic_set_timer(
563 boolean_t interrupt,
564 lapic_timer_mode_t mode,
565 lapic_timer_divide_t divisor,
566 lapic_timer_count_t initial_count)
567 {
568 boolean_t state;
569 uint32_t timer_vector;
570
571 state = ml_set_interrupts_enabled(FALSE);
572 timer_vector = LAPIC_REG(LVT_TIMER);
573 timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
574 timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED;
575 timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
576 LAPIC_REG(LVT_TIMER) = timer_vector;
577 LAPIC_REG(TIMER_DIVIDE_CONFIG) = divisor;
578 LAPIC_REG(TIMER_INITIAL_COUNT) = initial_count;
579 ml_set_interrupts_enabled(state);
580 }
581
582 void
583 lapic_get_timer(
584 lapic_timer_mode_t *mode,
585 lapic_timer_divide_t *divisor,
586 lapic_timer_count_t *initial_count,
587 lapic_timer_count_t *current_count)
588 {
589 boolean_t state;
590
591 state = ml_set_interrupts_enabled(FALSE);
592 if (mode)
593 *mode = (LAPIC_REG(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
594 periodic : one_shot;
595 if (divisor)
596 *divisor = LAPIC_REG(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
597 if (initial_count)
598 *initial_count = LAPIC_REG(TIMER_INITIAL_COUNT);
599 if (current_count)
600 *current_count = LAPIC_REG(TIMER_CURRENT_COUNT);
601 ml_set_interrupts_enabled(state);
602 }
603
604 void
605 lapic_set_pmi_func(i386_intr_func_t func)
606 {
607 lapic_pmi_func = func;
608 }
609
610 void
611 lapic_set_thermal_func(i386_intr_func_t func)
612 {
613 lapic_thermal_func = func;
614 }
615
616 static inline void
617 _lapic_end_of_interrupt(void)
618 {
619 LAPIC_REG(EOI) = 0;
620 }
621
622 void
623 lapic_end_of_interrupt(void)
624 {
625 _lapic_end_of_interrupt();
626 }
627
628 int
629 lapic_interrupt(int interrupt, x86_saved_state_t *state)
630 {
631 int retval = 0;
632
633 /* Did we just field an interruption for the HPET comparator? */
634 if(x86_core()->HpetVec == ((uint32_t)interrupt - 0x40)) {
635 /* Yes, go handle it... */
636 retval = HPETInterrupt();
637 /* Was it really handled? */
638 if(retval) {
639 /* If so, EOI the 'rupt */
640 _lapic_end_of_interrupt();
641 /*
642 * and then leave,
643 * indicating that this has been handled
644 */
645 return 1;
646 }
647 }
648
649 interrupt -= lapic_interrupt_base;
650 if (interrupt < 0) {
651 if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base)) {
652 retval = NMIInterruptHandler(state);
653 _lapic_end_of_interrupt();
654 return retval;
655 }
656 else
657 return 0;
658 }
659
660 switch(interrupt) {
661 case LAPIC_PERFCNT_INTERRUPT:
662 if (lapic_pmi_func != NULL)
663 (*lapic_pmi_func)(NULL);
664 /* Clear interrupt masked */
665 LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
666 _lapic_end_of_interrupt();
667 retval = 1;
668 break;
669 case LAPIC_TIMER_INTERRUPT:
670 _lapic_end_of_interrupt();
671 if (lapic_timer_func != NULL)
672 (*lapic_timer_func)(state);
673 retval = 1;
674 break;
675 case LAPIC_THERMAL_INTERRUPT:
676 if (lapic_thermal_func != NULL)
677 (*lapic_thermal_func)(NULL);
678 _lapic_end_of_interrupt();
679 retval = 1;
680 break;
681 case LAPIC_ERROR_INTERRUPT:
682 lapic_dump();
683 panic("Local APIC error\n");
684 _lapic_end_of_interrupt();
685 retval = 1;
686 break;
687 case LAPIC_SPURIOUS_INTERRUPT:
688 kprintf("SPIV\n");
689 /* No EOI required here */
690 retval = 1;
691 break;
692 case LAPIC_INTERPROCESSOR_INTERRUPT:
693 _lapic_end_of_interrupt();
694 cpu_signal_handler(state);
695 retval = 1;
696 break;
697 }
698
699 return retval;
700 }
701
702 void
703 lapic_smm_restore(void)
704 {
705 boolean_t state;
706
707 if (lapic_os_enabled == FALSE)
708 return;
709
710 state = ml_set_interrupts_enabled(FALSE);
711
712 if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
713 /*
714 * Bogus SMI handler enables interrupts but does not know about
715 * local APIC interrupt sources. When APIC timer counts down to
716 * zero while in SMM, local APIC will end up waiting for an EOI
717 * but no interrupt was delivered to the OS.
718 */
719 _lapic_end_of_interrupt();
720
721 /*
722 * timer is one-shot, trigger another quick countdown to trigger
723 * another timer interrupt.
724 */
725 if (LAPIC_REG(TIMER_CURRENT_COUNT) == 0) {
726 LAPIC_REG(TIMER_INITIAL_COUNT) = 1;
727 }
728
729 kprintf("lapic_smm_restore\n");
730 }
731
732 ml_set_interrupts_enabled(state);
733 }
734
735 kern_return_t
736 intel_startCPU(
737 int slot_num)
738 {
739
740 int i = 1000;
741 int lapic = cpu_to_lapic[slot_num];
742
743 assert(lapic != -1);
744
745 DBGLOG_CPU_INIT(slot_num);
746
747 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
748 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD);
749
750 /*
751 * Initialize (or re-initialize) the descriptor tables for this cpu.
752 * Propagate processor mode to slave.
753 */
754 if (cpu_mode_is64bit())
755 cpu_desc_init64(cpu_datap(slot_num), FALSE);
756 else
757 cpu_desc_init(cpu_datap(slot_num), FALSE);
758
759 /* Serialize use of the slave boot stack. */
760 mutex_lock(&mp_cpu_boot_lock);
761
762 mp_disable_preemption();
763 if (slot_num == get_cpu_number()) {
764 mp_enable_preemption();
765 mutex_unlock(&mp_cpu_boot_lock);
766 return KERN_SUCCESS;
767 }
768
769 LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
770 LAPIC_REG(ICR) = LAPIC_ICR_DM_INIT;
771 delay(10000);
772
773 LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
774 LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
775 delay(200);
776
777 LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
778 LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
779 delay(200);
780
781 #ifdef POSTCODE_DELAY
782 /* Wait much longer if postcodes are displayed for a delay period. */
783 i *= 10000;
784 #endif
785 while(i-- > 0) {
786 if (cpu_datap(slot_num)->cpu_running)
787 break;
788 delay(10000);
789 }
790
791 mp_enable_preemption();
792 mutex_unlock(&mp_cpu_boot_lock);
793
794 if (!cpu_datap(slot_num)->cpu_running) {
795 kprintf("Failed to start CPU %02d\n", slot_num);
796 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
797 delay(1000000);
798 cpu_shutdown();
799 return KERN_SUCCESS;
800 } else {
801 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
802 return KERN_SUCCESS;
803 }
804 }
805
806 extern char slave_boot_base[];
807 extern char slave_boot_end[];
808 extern void slave_pstart(void);
809
810 void
811 slave_boot_init(void)
812 {
813 DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n",
814 slave_boot_base,
815 kvtophys((vm_offset_t) slave_boot_base),
816 MP_BOOT,
817 slave_boot_end-slave_boot_base);
818
819 /*
820 * Copy the boot entry code to the real-mode vector area MP_BOOT.
821 * This is in page 1 which has been reserved for this purpose by
822 * machine_startup() from the boot processor.
823 * The slave boot code is responsible for switching to protected
824 * mode and then jumping to the common startup, _start().
825 */
826 bcopy_phys(kvtophys((vm_offset_t) slave_boot_base),
827 (addr64_t) MP_BOOT,
828 slave_boot_end-slave_boot_base);
829
830 /*
831 * Zero a stack area above the boot code.
832 */
833 DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK+MP_BOOT-0x400, 0x400);
834 bzero_phys((addr64_t)MP_BOOTSTACK+MP_BOOT-0x400, 0x400);
835
836 /*
837 * Set the location at the base of the stack to point to the
838 * common startup entry.
839 */
840 DBG("writing 0x%x at phys 0x%x\n",
841 kvtophys((vm_offset_t) &slave_pstart), MP_MACH_START+MP_BOOT);
842 ml_phys_write_word(MP_MACH_START+MP_BOOT,
843 (unsigned int)kvtophys((vm_offset_t) &slave_pstart));
844
845 /* Flush caches */
846 __asm__("wbinvd");
847 }
848
849 #if MP_DEBUG
850 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
851 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
852
853 MP_EVENT_NAME_DECL();
854
855 #endif /* MP_DEBUG */
856
857 void
858 cpu_signal_handler(x86_saved_state_t *regs)
859 {
860 int my_cpu;
861 volatile int *my_word;
862 #if MACH_KDB && MACH_ASSERT
863 int i=100;
864 #endif /* MACH_KDB && MACH_ASSERT */
865
866 mp_disable_preemption();
867
868 my_cpu = cpu_number();
869 my_word = &current_cpu_datap()->cpu_signals;
870
871 do {
872 #if MACH_KDB && MACH_ASSERT
873 if (i-- <= 0)
874 Debugger("cpu_signal_handler: signals did not clear");
875 #endif /* MACH_KDB && MACH_ASSERT */
876 #if MACH_KDP
877 if (i_bit(MP_KDP, my_word)) {
878 DBGLOG(cpu_handle,my_cpu,MP_KDP);
879 i_bit_clear(MP_KDP, my_word);
880 /* Ensure that the i386_kernel_state at the base of the
881 * current thread's stack (if any) is synchronized with the
882 * context at the moment of the interrupt, to facilitate
883 * access through the debugger.
884 * XXX 64-bit state?
885 */
886 sync_iss_to_iks(saved_state32(regs));
887 mp_kdp_wait(TRUE);
888 } else
889 #endif /* MACH_KDP */
890 if (i_bit(MP_TLB_FLUSH, my_word)) {
891 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
892 i_bit_clear(MP_TLB_FLUSH, my_word);
893 pmap_update_interrupt();
894 } else if (i_bit(MP_AST, my_word)) {
895 DBGLOG(cpu_handle,my_cpu,MP_AST);
896 i_bit_clear(MP_AST, my_word);
897 ast_check(cpu_to_processor(my_cpu));
898 #if MACH_KDB
899 } else if (i_bit(MP_KDB, my_word)) {
900
901 i_bit_clear(MP_KDB, my_word);
902 current_cpu_datap()->cpu_kdb_is_slave++;
903 mp_kdb_wait();
904 current_cpu_datap()->cpu_kdb_is_slave--;
905 #endif /* MACH_KDB */
906 } else if (i_bit(MP_RENDEZVOUS, my_word)) {
907 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
908 i_bit_clear(MP_RENDEZVOUS, my_word);
909 mp_rendezvous_action();
910 } else if (i_bit(MP_BROADCAST, my_word)) {
911 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
912 i_bit_clear(MP_BROADCAST, my_word);
913 mp_broadcast_action();
914 } else if (i_bit(MP_CHUD, my_word)) {
915 DBGLOG(cpu_handle,my_cpu,MP_CHUD);
916 i_bit_clear(MP_CHUD, my_word);
917 chudxnu_cpu_signal_handler();
918 } else if (i_bit(MP_CALL, my_word)) {
919 DBGLOG(cpu_handle,my_cpu,MP_CALL);
920 i_bit_clear(MP_CALL, my_word);
921 mp_cpus_call_action();
922 }
923 } while (*my_word);
924
925 mp_enable_preemption();
926
927 }
928
929 /* We want this to show up in backtraces, hence marked noinline.
930 */
931 static int __attribute__((noinline))
932 NMIInterruptHandler(x86_saved_state_t *regs)
933 {
934 boolean_t state = ml_set_interrupts_enabled(FALSE);
935 sync_iss_to_iks_unconditionally(regs);
936 mp_kdp_wait(FALSE);
937 (void) ml_set_interrupts_enabled(state);
938 return 1;
939 }
940
941 #ifdef MP_DEBUG
942 extern int max_lock_loops;
943 int trappedalready = 0; /* (BRINGUP */
944 #endif /* MP_DEBUG */
945
946 static void
947 i386_cpu_IPI(int cpu)
948 {
949 boolean_t state;
950
951 #ifdef MP_DEBUG
952 if(cpu_datap(cpu)->cpu_signals & 6) { /* (BRINGUP) */
953 kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu);
954 }
955 #endif /* MP_DEBUG */
956
957 #if MACH_KDB
958 #ifdef MP_DEBUG
959 if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) { /* (BRINGUP) */
960 if(kdb_cpu != cpu_number()) {
961 trappedalready = 1;
962 panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n",
963 cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu);
964 }
965 }
966 #endif /* MP_DEBUG */
967 #endif
968
969 /* Wait for previous interrupt to be delivered... */
970 #ifdef MP_DEBUG
971 int pending_busy_count = 0;
972 while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
973 if (++pending_busy_count > max_lock_loops)
974 panic("i386_cpu_IPI() deadlock\n");
975 #else
976 while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
977 #endif /* MP_DEBUG */
978 cpu_pause();
979 }
980
981 state = ml_set_interrupts_enabled(FALSE);
982 LAPIC_REG(ICRD) =
983 cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
984 LAPIC_REG(ICR) =
985 LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED;
986 (void) ml_set_interrupts_enabled(state);
987 }
988
989 /*
990 * cpu_interrupt is really just to be used by the scheduler to
991 * get a CPU's attention it may not always issue an IPI. If an
992 * IPI is always needed then use i386_cpu_IPI.
993 */
994 void
995 cpu_interrupt(int cpu)
996 {
997 if (smp_initialized
998 && pmCPUExitIdle(cpu_datap(cpu))) {
999 i386_cpu_IPI(cpu);
1000 }
1001 }
1002
1003 /*
1004 * Send a true NMI via the local APIC to the specified CPU.
1005 */
1006 static void
1007 cpu_NMI_interrupt(int cpu)
1008 {
1009 boolean_t state;
1010
1011 if (smp_initialized) {
1012 state = ml_set_interrupts_enabled(FALSE);
1013 /* Program the interrupt command register */
1014 LAPIC_REG(ICRD) =
1015 cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
1016 /* The vector is ignored in this case--the target CPU will enter on the
1017 * NMI vector.
1018 */
1019 LAPIC_REG(ICR) =
1020 LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_NMI;
1021 (void) ml_set_interrupts_enabled(state);
1022 }
1023 }
1024
1025 void
1026 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
1027 {
1028 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1029 uint64_t tsc_timeout;
1030
1031
1032 if (!cpu_datap(cpu)->cpu_running)
1033 return;
1034
1035 if (event == MP_TLB_FLUSH)
1036 KERNEL_DEBUG(0xef800020 | DBG_FUNC_START, cpu, 0, 0, 0, 0);
1037
1038 DBGLOG(cpu_signal, cpu, event);
1039
1040 i_bit_set(event, signals);
1041 i386_cpu_IPI(cpu);
1042 if (mode == SYNC) {
1043 again:
1044 tsc_timeout = rdtsc64() + (1000*1000*1000);
1045 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
1046 cpu_pause();
1047 }
1048 if (i_bit(event, signals)) {
1049 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
1050 cpu, event);
1051 goto again;
1052 }
1053 }
1054 if (event == MP_TLB_FLUSH)
1055 KERNEL_DEBUG(0xef800020 | DBG_FUNC_END, cpu, 0, 0, 0, 0);
1056 }
1057
1058 /*
1059 * Send event to all running cpus.
1060 * Called with the topology locked.
1061 */
1062 void
1063 i386_signal_cpus(mp_event_t event, mp_sync_t mode)
1064 {
1065 unsigned int cpu;
1066 unsigned int my_cpu = cpu_number();
1067
1068 assert(hw_lock_held(&x86_topo_lock));
1069
1070 for (cpu = 0; cpu < real_ncpus; cpu++) {
1071 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1072 continue;
1073 i386_signal_cpu(cpu, event, mode);
1074 }
1075 }
1076
1077 /*
1078 * Return the number of running cpus.
1079 * Called with the topology locked.
1080 */
1081 int
1082 i386_active_cpus(void)
1083 {
1084 unsigned int cpu;
1085 unsigned int ncpus = 0;
1086
1087 assert(hw_lock_held(&x86_topo_lock));
1088
1089 for (cpu = 0; cpu < real_ncpus; cpu++) {
1090 if (cpu_datap(cpu)->cpu_running)
1091 ncpus++;
1092 }
1093 return(ncpus);
1094 }
1095
1096 /*
1097 * All-CPU rendezvous:
1098 * - CPUs are signalled,
1099 * - all execute the setup function (if specified),
1100 * - rendezvous (i.e. all cpus reach a barrier),
1101 * - all execute the action function (if specified),
1102 * - rendezvous again,
1103 * - execute the teardown function (if specified), and then
1104 * - resume.
1105 *
1106 * Note that the supplied external functions _must_ be reentrant and aware
1107 * that they are running in parallel and in an unknown lock context.
1108 */
1109
1110 static void
1111 mp_rendezvous_action(void)
1112 {
1113 boolean_t intrs_enabled;
1114
1115 /* setup function */
1116 if (mp_rv_setup_func != NULL)
1117 mp_rv_setup_func(mp_rv_func_arg);
1118
1119 intrs_enabled = ml_get_interrupts_enabled();
1120
1121 /* spin on entry rendezvous */
1122 atomic_incl(&mp_rv_entry, 1);
1123 while (mp_rv_entry < mp_rv_ncpus) {
1124 /* poll for pesky tlb flushes if interrupts disabled */
1125 if (!intrs_enabled)
1126 handle_pending_TLB_flushes();
1127 cpu_pause();
1128 }
1129 /* action function */
1130 if (mp_rv_action_func != NULL)
1131 mp_rv_action_func(mp_rv_func_arg);
1132 /* spin on exit rendezvous */
1133 atomic_incl(&mp_rv_exit, 1);
1134 while (mp_rv_exit < mp_rv_ncpus) {
1135 if (!intrs_enabled)
1136 handle_pending_TLB_flushes();
1137 cpu_pause();
1138 }
1139
1140 /* teardown function */
1141 if (mp_rv_teardown_func != NULL)
1142 mp_rv_teardown_func(mp_rv_func_arg);
1143
1144 /* Bump completion count */
1145 atomic_incl(&mp_rv_complete, 1);
1146 }
1147
1148 void
1149 mp_rendezvous(void (*setup_func)(void *),
1150 void (*action_func)(void *),
1151 void (*teardown_func)(void *),
1152 void *arg)
1153 {
1154
1155 if (!smp_initialized) {
1156 if (setup_func != NULL)
1157 setup_func(arg);
1158 if (action_func != NULL)
1159 action_func(arg);
1160 if (teardown_func != NULL)
1161 teardown_func(arg);
1162 return;
1163 }
1164
1165 /* obtain rendezvous lock */
1166 simple_lock(&mp_rv_lock);
1167
1168 /* set static function pointers */
1169 mp_rv_setup_func = setup_func;
1170 mp_rv_action_func = action_func;
1171 mp_rv_teardown_func = teardown_func;
1172 mp_rv_func_arg = arg;
1173
1174 mp_rv_entry = 0;
1175 mp_rv_exit = 0;
1176 mp_rv_complete = 0;
1177
1178 /*
1179 * signal other processors, which will call mp_rendezvous_action()
1180 * with interrupts disabled
1181 */
1182 simple_lock(&x86_topo_lock);
1183 mp_rv_ncpus = i386_active_cpus();
1184 i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
1185 simple_unlock(&x86_topo_lock);
1186
1187 /* call executor function on this cpu */
1188 mp_rendezvous_action();
1189
1190 /*
1191 * Spin for everyone to complete.
1192 * This is necessary to ensure that all processors have proceeded
1193 * from the exit barrier before we release the rendezvous structure.
1194 */
1195 while (mp_rv_complete < mp_rv_ncpus) {
1196 cpu_pause();
1197 }
1198
1199 /* Tidy up */
1200 mp_rv_setup_func = NULL;
1201 mp_rv_action_func = NULL;
1202 mp_rv_teardown_func = NULL;
1203 mp_rv_func_arg = NULL;
1204
1205 /* release lock */
1206 simple_unlock(&mp_rv_lock);
1207 }
1208
1209 void
1210 mp_rendezvous_break_lock(void)
1211 {
1212 simple_lock_init(&mp_rv_lock, 0);
1213 }
1214
1215 static void
1216 setup_disable_intrs(__unused void * param_not_used)
1217 {
1218 /* disable interrupts before the first barrier */
1219 boolean_t intr = ml_set_interrupts_enabled(FALSE);
1220
1221 current_cpu_datap()->cpu_iflag = intr;
1222 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
1223 }
1224
1225 static void
1226 teardown_restore_intrs(__unused void * param_not_used)
1227 {
1228 /* restore interrupt flag following MTRR changes */
1229 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
1230 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
1231 }
1232
1233 /*
1234 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1235 * This is exported for use by kexts.
1236 */
1237 void
1238 mp_rendezvous_no_intrs(
1239 void (*action_func)(void *),
1240 void *arg)
1241 {
1242 mp_rendezvous(setup_disable_intrs,
1243 action_func,
1244 teardown_restore_intrs,
1245 arg);
1246 }
1247
1248 void
1249 handle_pending_TLB_flushes(void)
1250 {
1251 volatile int *my_word = &current_cpu_datap()->cpu_signals;
1252
1253 if (i_bit(MP_TLB_FLUSH, my_word)) {
1254 DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH);
1255 i_bit_clear(MP_TLB_FLUSH, my_word);
1256 pmap_update_interrupt();
1257 }
1258 }
1259
1260 /*
1261 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1262 */
1263 static void
1264 mp_cpus_call_action(void)
1265 {
1266 if (mp_rv_action_func != NULL)
1267 mp_rv_action_func(mp_rv_func_arg);
1268 atomic_incl(&mp_rv_complete, 1);
1269 }
1270
1271 /*
1272 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1273 * If the mode is SYNC, the function is called serially on the target cpus
1274 * in logical cpu order. If the mode is ASYNC, the function is called in
1275 * parallel over the specified cpus.
1276 * The action function may be NULL.
1277 * The cpu mask may include the local cpu. Offline cpus are ignored.
1278 * Return does not occur until the function has completed on all cpus.
1279 * The return value is the number of cpus on which the function was called.
1280 */
1281 cpu_t
1282 mp_cpus_call(
1283 cpumask_t cpus,
1284 mp_sync_t mode,
1285 void (*action_func)(void *),
1286 void *arg)
1287 {
1288 cpu_t cpu;
1289 boolean_t intrs_enabled = ml_get_interrupts_enabled();
1290 boolean_t call_self = FALSE;
1291
1292 if (!smp_initialized) {
1293 if ((cpus & CPUMASK_SELF) == 0)
1294 return 0;
1295 if (action_func != NULL) {
1296 (void) ml_set_interrupts_enabled(FALSE);
1297 action_func(arg);
1298 ml_set_interrupts_enabled(intrs_enabled);
1299 }
1300 return 1;
1301 }
1302
1303 /* obtain rendezvous lock */
1304 simple_lock(&mp_rv_lock);
1305
1306 /* Use the rendezvous data structures for this call */
1307 mp_rv_action_func = action_func;
1308 mp_rv_func_arg = arg;
1309 mp_rv_ncpus = 0;
1310 mp_rv_complete = 0;
1311
1312 simple_lock(&x86_topo_lock);
1313 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1314 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1315 !cpu_datap(cpu)->cpu_running)
1316 continue;
1317 if (cpu == (cpu_t) cpu_number()) {
1318 /*
1319 * We don't IPI ourself and if calling asynchronously,
1320 * we defer our call until we have signalled all others.
1321 */
1322 call_self = TRUE;
1323 if (mode == SYNC && action_func != NULL) {
1324 (void) ml_set_interrupts_enabled(FALSE);
1325 action_func(arg);
1326 ml_set_interrupts_enabled(intrs_enabled);
1327 }
1328 } else {
1329 /*
1330 * Bump count of other cpus called and signal this cpu.
1331 * Note: we signal asynchronously regardless of mode
1332 * because we wait on mp_rv_complete either here
1333 * (if mode == SYNC) or later (if mode == ASYNC).
1334 * While spinning, poll for TLB flushes if interrupts
1335 * are disabled.
1336 */
1337 mp_rv_ncpus++;
1338 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1339 if (mode == SYNC) {
1340 simple_unlock(&x86_topo_lock);
1341 while (mp_rv_complete < mp_rv_ncpus) {
1342 if (!intrs_enabled)
1343 handle_pending_TLB_flushes();
1344 cpu_pause();
1345 }
1346 simple_lock(&x86_topo_lock);
1347 }
1348 }
1349 }
1350 simple_unlock(&x86_topo_lock);
1351
1352 /*
1353 * If calls are being made asynchronously,
1354 * make the local call now if needed, and then
1355 * wait for all other cpus to finish their calls.
1356 */
1357 if (mode == ASYNC) {
1358 if (call_self && action_func != NULL) {
1359 (void) ml_set_interrupts_enabled(FALSE);
1360 action_func(arg);
1361 ml_set_interrupts_enabled(intrs_enabled);
1362 }
1363 while (mp_rv_complete < mp_rv_ncpus) {
1364 if (!intrs_enabled)
1365 handle_pending_TLB_flushes();
1366 cpu_pause();
1367 }
1368 }
1369
1370 /* Determine the number of cpus called */
1371 cpu = mp_rv_ncpus + (call_self ? 1 : 0);
1372
1373 simple_unlock(&mp_rv_lock);
1374
1375 return cpu;
1376 }
1377
1378 static void
1379 mp_broadcast_action(void)
1380 {
1381 /* call action function */
1382 if (mp_bc_action_func != NULL)
1383 mp_bc_action_func(mp_bc_func_arg);
1384
1385 /* if we're the last one through, wake up the instigator */
1386 if (atomic_decl_and_test((volatile long *)&mp_bc_count, 1))
1387 thread_wakeup(((event_t)(unsigned int *) &mp_bc_count));
1388 }
1389
1390 /*
1391 * mp_broadcast() runs a given function on all active cpus.
1392 * The caller blocks until the functions has run on all cpus.
1393 * The caller will also block if there is another pending braodcast.
1394 */
1395 void
1396 mp_broadcast(
1397 void (*action_func)(void *),
1398 void *arg)
1399 {
1400 if (!smp_initialized) {
1401 if (action_func != NULL)
1402 action_func(arg);
1403 return;
1404 }
1405
1406 /* obtain broadcast lock */
1407 mutex_lock(&mp_bc_lock);
1408
1409 /* set static function pointers */
1410 mp_bc_action_func = action_func;
1411 mp_bc_func_arg = arg;
1412
1413 assert_wait(&mp_bc_count, THREAD_UNINT);
1414
1415 /*
1416 * signal other processors, which will call mp_broadcast_action()
1417 */
1418 simple_lock(&x86_topo_lock);
1419 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */
1420 mp_bc_count = mp_bc_ncpus;
1421 i386_signal_cpus(MP_BROADCAST, ASYNC);
1422
1423 /* call executor function on this cpu */
1424 mp_broadcast_action();
1425 simple_unlock(&x86_topo_lock);
1426
1427 /* block for all cpus to have run action_func */
1428 if (mp_bc_ncpus > 1)
1429 thread_block(THREAD_CONTINUE_NULL);
1430 else
1431 clear_wait(current_thread(), THREAD_AWAKENED);
1432
1433 /* release lock */
1434 mutex_unlock(&mp_bc_lock);
1435 }
1436
1437 void
1438 i386_activate_cpu(void)
1439 {
1440 cpu_data_t *cdp = current_cpu_datap();
1441
1442 assert(!ml_get_interrupts_enabled());
1443
1444 if (!smp_initialized) {
1445 cdp->cpu_running = TRUE;
1446 return;
1447 }
1448
1449 simple_lock(&x86_topo_lock);
1450 cdp->cpu_running = TRUE;
1451 simple_unlock(&x86_topo_lock);
1452 }
1453
1454 void
1455 i386_deactivate_cpu(void)
1456 {
1457 cpu_data_t *cdp = current_cpu_datap();
1458
1459 assert(!ml_get_interrupts_enabled());
1460
1461 simple_lock(&x86_topo_lock);
1462 cdp->cpu_running = FALSE;
1463 simple_unlock(&x86_topo_lock);
1464
1465 /*
1466 * In case a rendezvous/braodcast/call was initiated to this cpu
1467 * before we cleared cpu_running, we must perform any actions due.
1468 */
1469 if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals))
1470 mp_rendezvous_action();
1471 if (i_bit(MP_BROADCAST, &cdp->cpu_signals))
1472 mp_broadcast_action();
1473 if (i_bit(MP_CALL, &cdp->cpu_signals))
1474 mp_cpus_call_action();
1475 cdp->cpu_signals = 0; /* all clear */
1476 }
1477
1478 int pmsafe_debug = 1;
1479
1480 #if MACH_KDP
1481 volatile boolean_t mp_kdp_trap = FALSE;
1482 volatile unsigned long mp_kdp_ncpus;
1483 boolean_t mp_kdp_state;
1484
1485
1486 void
1487 mp_kdp_enter(void)
1488 {
1489 unsigned int cpu;
1490 unsigned int ncpus;
1491 unsigned int my_cpu = cpu_number();
1492 uint64_t tsc_timeout;
1493
1494 DBG("mp_kdp_enter()\n");
1495
1496 /*
1497 * Here to enter the debugger.
1498 * In case of races, only one cpu is allowed to enter kdp after
1499 * stopping others.
1500 */
1501 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1502 simple_lock(&mp_kdp_lock);
1503
1504 if (pmsafe_debug)
1505 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1506
1507 while (mp_kdp_trap) {
1508 simple_unlock(&mp_kdp_lock);
1509 DBG("mp_kdp_enter() race lost\n");
1510 mp_kdp_wait(TRUE);
1511 simple_lock(&mp_kdp_lock);
1512 }
1513 mp_kdp_ncpus = 1; /* self */
1514 mp_kdp_trap = TRUE;
1515 simple_unlock(&mp_kdp_lock);
1516
1517 /*
1518 * Deliver a nudge to other cpus, counting how many
1519 */
1520 DBG("mp_kdp_enter() signaling other processors\n");
1521 if (force_immediate_debugger_NMI == FALSE) {
1522 for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) {
1523 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1524 continue;
1525 ncpus++;
1526 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1527 }
1528 /*
1529 * Wait other processors to synchronize
1530 */
1531 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1532
1533 /*
1534 * This timeout is rather arbitrary; we don't want to NMI
1535 * processors that are executing at potentially
1536 * "unsafe-to-interrupt" points such as the trampolines,
1537 * but neither do we want to lose state by waiting too long.
1538 */
1539 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000);
1540
1541 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1542 /*
1543 * A TLB shootdown request may be pending--this would
1544 * result in the requesting processor waiting in
1545 * PMAP_UPDATE_TLBS() until this processor deals with it.
1546 * Process it, so it can now enter mp_kdp_wait()
1547 */
1548 handle_pending_TLB_flushes();
1549 cpu_pause();
1550 }
1551 /* If we've timed out, and some processor(s) are still unresponsive,
1552 * interrupt them with an NMI via the local APIC.
1553 */
1554 if (mp_kdp_ncpus != ncpus) {
1555 for (cpu = 0; cpu < real_ncpus; cpu++) {
1556 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1557 continue;
1558 if (cpu_signal_pending(cpu, MP_KDP))
1559 cpu_NMI_interrupt(cpu);
1560 }
1561 }
1562 }
1563 else
1564 for (cpu = 0; cpu < real_ncpus; cpu++) {
1565 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1566 continue;
1567 cpu_NMI_interrupt(cpu);
1568 }
1569
1570 DBG("mp_kdp_enter() %u processors done %s\n",
1571 mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1572
1573 postcode(MP_KDP_ENTER);
1574 }
1575
1576 static boolean_t
1577 cpu_signal_pending(int cpu, mp_event_t event)
1578 {
1579 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1580 boolean_t retval = FALSE;
1581
1582 if (i_bit(event, signals))
1583 retval = TRUE;
1584 return retval;
1585 }
1586
1587
1588 static void
1589 mp_kdp_wait(boolean_t flush)
1590 {
1591 DBG("mp_kdp_wait()\n");
1592 /* If an I/O port has been specified as a debugging aid, issue a read */
1593 panic_io_port_read();
1594
1595 /* If we've trapped due to a machine-check, save MCA registers */
1596 mca_check_save();
1597
1598 if (pmsafe_debug)
1599 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1600
1601 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1602 while (mp_kdp_trap) {
1603 /*
1604 * A TLB shootdown request may be pending--this would result
1605 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1606 * until this processor handles it.
1607 * Process it, so it can now enter mp_kdp_wait()
1608 */
1609 if (flush)
1610 handle_pending_TLB_flushes();
1611 cpu_pause();
1612 }
1613
1614 if (pmsafe_debug)
1615 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1616
1617 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1618 DBG("mp_kdp_wait() done\n");
1619 }
1620
1621 void
1622 mp_kdp_exit(void)
1623 {
1624 DBG("mp_kdp_exit()\n");
1625 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1626 mp_kdp_trap = FALSE;
1627 __asm__ volatile("mfence");
1628
1629 /* Wait other processors to stop spinning. XXX needs timeout */
1630 DBG("mp_kdp_exit() waiting for processors to resume\n");
1631 while (mp_kdp_ncpus > 0) {
1632 /*
1633 * a TLB shootdown request may be pending... this would result in the requesting
1634 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1635 * Process it, so it can now enter mp_kdp_wait()
1636 */
1637 handle_pending_TLB_flushes();
1638
1639 cpu_pause();
1640 }
1641
1642 if (pmsafe_debug)
1643 pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1644
1645 DBG("mp_kdp_exit() done\n");
1646 (void) ml_set_interrupts_enabled(mp_kdp_state);
1647 postcode(0);
1648 }
1649 #endif /* MACH_KDP */
1650
1651 /*ARGSUSED*/
1652 void
1653 init_ast_check(
1654 __unused processor_t processor)
1655 {
1656 }
1657
1658 void
1659 cause_ast_check(
1660 processor_t processor)
1661 {
1662 int cpu = PROCESSOR_DATA(processor, slot_num);
1663
1664 if (cpu != cpu_number()) {
1665 i386_signal_cpu(cpu, MP_AST, ASYNC);
1666 }
1667 }
1668
1669 #if MACH_KDB
1670 /*
1671 * invoke kdb on slave processors
1672 */
1673
1674 void
1675 remote_kdb(void)
1676 {
1677 unsigned int my_cpu = cpu_number();
1678 unsigned int cpu;
1679 int kdb_ncpus;
1680 uint64_t tsc_timeout = 0;
1681
1682 mp_kdb_trap = TRUE;
1683 mp_kdb_ncpus = 1;
1684 for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) {
1685 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
1686 continue;
1687 kdb_ncpus++;
1688 i386_signal_cpu(cpu, MP_KDB, ASYNC);
1689 }
1690 DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus);
1691
1692 tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000);
1693
1694 while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) {
1695 /*
1696 * a TLB shootdown request may be pending... this would result in the requesting
1697 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1698 * Process it, so it can now enter mp_kdp_wait()
1699 */
1700 handle_pending_TLB_flushes();
1701
1702 cpu_pause();
1703 }
1704 DBG("mp_kdp_enter() %d processors done %s\n",
1705 mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out");
1706 }
1707
1708 static void
1709 mp_kdb_wait(void)
1710 {
1711 DBG("mp_kdb_wait()\n");
1712
1713 /* If an I/O port has been specified as a debugging aid, issue a read */
1714 panic_io_port_read();
1715
1716 atomic_incl(&mp_kdb_ncpus, 1);
1717 while (mp_kdb_trap) {
1718 /*
1719 * a TLB shootdown request may be pending... this would result in the requesting
1720 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1721 * Process it, so it can now enter mp_kdp_wait()
1722 */
1723 handle_pending_TLB_flushes();
1724
1725 cpu_pause();
1726 }
1727 atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
1728 DBG("mp_kdb_wait() done\n");
1729 }
1730
1731 /*
1732 * Clear kdb interrupt
1733 */
1734
1735 void
1736 clear_kdb_intr(void)
1737 {
1738 mp_disable_preemption();
1739 i_bit_clear(MP_KDB, &current_cpu_datap()->cpu_signals);
1740 mp_enable_preemption();
1741 }
1742
1743 void
1744 mp_kdb_exit(void)
1745 {
1746 DBG("mp_kdb_exit()\n");
1747 atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
1748 mp_kdb_trap = FALSE;
1749 __asm__ volatile("mfence");
1750
1751 while (mp_kdb_ncpus > 0) {
1752 /*
1753 * a TLB shootdown request may be pending... this would result in the requesting
1754 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1755 * Process it, so it can now enter mp_kdp_wait()
1756 */
1757 handle_pending_TLB_flushes();
1758
1759 cpu_pause();
1760 }
1761
1762 DBG("mp_kdb_exit() done\n");
1763 }
1764
1765 #endif /* MACH_KDB */
1766
1767 /*
1768 * i386_init_slave() is called from pstart.
1769 * We're in the cpu's interrupt stack with interrupts disabled.
1770 * At this point we are in legacy mode. We need to switch on IA32e
1771 * if the mode is set to 64-bits.
1772 */
1773 void
1774 i386_init_slave(void)
1775 {
1776 postcode(I386_INIT_SLAVE);
1777
1778 /* Ensure that caching and write-through are enabled */
1779 set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
1780
1781 DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
1782 get_cpu_number(), get_cpu_phys_number());
1783
1784 assert(!ml_get_interrupts_enabled());
1785
1786 cpu_mode_init(current_cpu_datap());
1787
1788 mca_cpu_init();
1789
1790 lapic_init();
1791 LAPIC_DUMP();
1792 LAPIC_CPU_MAP_DUMP();
1793
1794 init_fpu();
1795
1796 mtrr_update_cpu();
1797
1798 /* resume VT operation */
1799 vmx_resume();
1800
1801 pat_init();
1802
1803 cpu_thread_init(); /* not strictly necessary */
1804
1805 cpu_init(); /* Sets cpu_running which starter cpu waits for */
1806
1807 slave_main();
1808
1809 panic("i386_init_slave() returned from slave_main()");
1810 }
1811
1812 void
1813 slave_machine_init(void)
1814 {
1815 /*
1816 * Here in process context, but with interrupts disabled.
1817 */
1818 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1819
1820 clock_init();
1821
1822 cpu_machine_init(); /* Interrupts enabled hereafter */
1823 }
1824
1825 #undef cpu_number()
1826 int cpu_number(void)
1827 {
1828 return get_cpu_number();
1829 }
1830
1831 #if MACH_KDB
1832 #include <ddb/db_output.h>
1833
1834 #define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
1835
1836
1837 #if TRAP_DEBUG
1838 #define MTRAPS 100
1839 struct mp_trap_hist_struct {
1840 unsigned char type;
1841 unsigned char data[5];
1842 } trap_hist[MTRAPS], *cur_trap_hist = trap_hist,
1843 *max_trap_hist = &trap_hist[MTRAPS];
1844
1845 void db_trap_hist(void);
1846
1847 /*
1848 * SPL:
1849 * 1: new spl
1850 * 2: old spl
1851 * 3: new tpr
1852 * 4: old tpr
1853 * INT:
1854 * 1: int vec
1855 * 2: old spl
1856 * 3: new spl
1857 * 4: post eoi tpr
1858 * 5: exit tpr
1859 */
1860
1861 void
1862 db_trap_hist(void)
1863 {
1864 int i,j;
1865 for(i=0;i<MTRAPS;i++)
1866 if (trap_hist[i].type == 1 || trap_hist[i].type == 2) {
1867 db_printf("%s%s",
1868 (&trap_hist[i]>=cur_trap_hist)?"*":" ",
1869 (trap_hist[i].type == 1)?"SPL":"INT");
1870 for(j=0;j<5;j++)
1871 db_printf(" %02x", trap_hist[i].data[j]);
1872 db_printf("\n");
1873 }
1874
1875 }
1876 #endif /* TRAP_DEBUG */
1877 #endif /* MACH_KDB */
1878