]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/trap.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / i386 / trap.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * Hardware trap/fault handler.
61 */
62
63 #include <mach_kdb.h>
64 #include <mach_kgdb.h>
65 #include <mach_kdp.h>
66 #include <mach_ldebug.h>
67
68 #include <types.h>
69 #include <i386/eflags.h>
70 #include <i386/trap.h>
71 #include <i386/pmap.h>
72 #include <i386/fpu.h>
73 #include <i386/misc_protos.h> /* panic_io_port_read() */
74 #include <i386/lapic.h>
75
76 #include <mach/exception.h>
77 #include <mach/kern_return.h>
78 #include <mach/vm_param.h>
79 #include <mach/i386/thread_status.h>
80
81 #include <vm/vm_kern.h>
82 #include <vm/vm_fault.h>
83
84 #include <kern/kern_types.h>
85 #include <kern/processor.h>
86 #include <kern/thread.h>
87 #include <kern/task.h>
88 #include <kern/sched.h>
89 #include <kern/sched_prim.h>
90 #include <kern/exception.h>
91 #include <kern/spl.h>
92 #include <kern/misc_protos.h>
93 #include <kern/debug.h>
94
95 #include <sys/kdebug.h>
96
97 #if MACH_KGDB
98 #include <kgdb/kgdb_defs.h>
99 #endif /* MACH_KGDB */
100
101 #if MACH_KDB
102 #include <debug.h>
103 #include <ddb/db_watch.h>
104 #include <ddb/db_run.h>
105 #include <ddb/db_break.h>
106 #include <ddb/db_trap.h>
107 #endif /* MACH_KDB */
108
109 #include <string.h>
110
111 #include <i386/postcode.h>
112 #include <i386/mp_desc.h>
113 #include <i386/proc_reg.h>
114 #if CONFIG_MCA
115 #include <i386/machine_check.h>
116 #endif
117 #include <mach/i386/syscall_sw.h>
118
119 #include <libkern/OSDebug.h>
120
121 extern void throttle_lowpri_io(boolean_t);
122
123
124 /*
125 * Forward declarations
126 */
127 static void user_page_fault_continue(kern_return_t kret);
128 #ifdef __i386__
129 static void panic_trap(x86_saved_state32_t *saved_state);
130 static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip);
131 static void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
132 #else
133 static void panic_trap(x86_saved_state64_t *saved_state);
134 static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
135 #endif
136
137 volatile perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */
138 volatile perfCallback perfASTHook = NULL; /* Pointer to CHUD AST hook routine */
139
140 #if CONFIG_DTRACE
141 /* See <rdar://problem/4613924> */
142 perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routine */
143
144 extern boolean_t dtrace_tally_fault(user_addr_t);
145 #endif
146
147 void
148 thread_syscall_return(
149 kern_return_t ret)
150 {
151 thread_t thr_act = current_thread();
152 boolean_t is_mach;
153 int code;
154
155
156 if (thread_is_64bit(thr_act)) {
157 x86_saved_state64_t *regs;
158
159 regs = USER_REGS64(thr_act);
160
161 code = (int) (regs->rax & SYSCALL_NUMBER_MASK);
162 is_mach = (regs->rax & SYSCALL_CLASS_MASK)
163 == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT);
164 if (kdebug_enable && is_mach) {
165 /* Mach trap */
166 KERNEL_DEBUG_CONSTANT(
167 MACHDBG_CODE(DBG_MACH_EXCP_SC,code)|DBG_FUNC_END,
168 ret, 0, 0, 0, 0);
169 }
170 regs->rax = ret;
171 #if DEBUG
172 if (is_mach)
173 DEBUG_KPRINT_SYSCALL_MACH(
174 "thread_syscall_return: 64-bit mach ret=%u\n",
175 ret);
176 else
177 DEBUG_KPRINT_SYSCALL_UNIX(
178 "thread_syscall_return: 64-bit unix ret=%u\n",
179 ret);
180 #endif
181 } else {
182 x86_saved_state32_t *regs;
183
184 regs = USER_REGS32(thr_act);
185
186 code = ((int) regs->eax);
187 is_mach = (code < 0);
188 if (kdebug_enable && is_mach) {
189 /* Mach trap */
190 KERNEL_DEBUG_CONSTANT(
191 MACHDBG_CODE(DBG_MACH_EXCP_SC,-code)|DBG_FUNC_END,
192 ret, 0, 0, 0, 0);
193 }
194 regs->eax = ret;
195 #if DEBUG
196 if (is_mach)
197 DEBUG_KPRINT_SYSCALL_MACH(
198 "thread_syscall_return: 32-bit mach ret=%u\n",
199 ret);
200 else
201 DEBUG_KPRINT_SYSCALL_UNIX(
202 "thread_syscall_return: 32-bit unix ret=%u\n",
203 ret);
204 #endif
205 }
206 throttle_lowpri_io(TRUE);
207
208 thread_exception_return();
209 /*NOTREACHED*/
210 }
211
212
213 #if MACH_KDB
214 boolean_t debug_all_traps_with_kdb = FALSE;
215 extern struct db_watchpoint *db_watchpoint_list;
216 extern boolean_t db_watchpoints_inserted;
217 extern boolean_t db_breakpoints_inserted;
218
219 void
220 thread_kdb_return(void)
221 {
222 thread_t thr_act = current_thread();
223 x86_saved_state_t *iss = USER_STATE(thr_act);
224
225
226 if (is_saved_state64(iss)) {
227 x86_saved_state64_t *regs;
228
229 regs = saved_state64(iss);
230
231 if (kdb_trap(regs->isf.trapno, (int)regs->isf.err, (void *)regs)) {
232 thread_exception_return();
233 /*NOTREACHED*/
234 }
235
236 } else {
237 x86_saved_state32_t *regs;
238
239 regs = saved_state32(iss);
240
241 if (kdb_trap(regs->trapno, regs->err, (void *)regs)) {
242 thread_exception_return();
243 /*NOTREACHED*/
244 }
245 }
246 }
247
248 #endif /* MACH_KDB */
249
250 void
251 user_page_fault_continue(
252 kern_return_t kr)
253 {
254 thread_t thread = current_thread();
255 ast_t *myast;
256 boolean_t intr;
257 user_addr_t vaddr;
258
259
260 #if MACH_KDB
261 x86_saved_state_t *regs = USER_STATE(thread);
262 int err;
263 int trapno;
264
265 assert((is_saved_state32(regs) && !thread_is_64bit(thread)) ||
266 (is_saved_state64(regs) && thread_is_64bit(thread)));
267 #endif
268
269 if (thread_is_64bit(thread)) {
270 x86_saved_state64_t *uregs;
271
272 uregs = USER_REGS64(thread);
273
274 #if MACH_KDB
275 trapno = uregs->isf.trapno;
276 err = (int)uregs->isf.err;
277 #endif
278 vaddr = (user_addr_t)uregs->cr2;
279 } else {
280 x86_saved_state32_t *uregs;
281
282 uregs = USER_REGS32(thread);
283
284 #if MACH_KDB
285 trapno = uregs->trapno;
286 err = uregs->err;
287 #endif
288 vaddr = uregs->cr2;
289 }
290
291 if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) {
292 #if MACH_KDB
293 if (!db_breakpoints_inserted) {
294 db_set_breakpoints();
295 }
296 if (db_watchpoint_list &&
297 db_watchpoints_inserted &&
298 (err & T_PF_WRITE) &&
299 db_find_watchpoint(thread->map,
300 (vm_offset_t)vaddr,
301 saved_state32(regs)))
302 kdb_trap(T_WATCHPOINT, 0, saved_state32(regs));
303 #endif /* MACH_KDB */
304 intr = ml_set_interrupts_enabled(FALSE);
305 myast = ast_pending();
306 while (*myast & AST_ALL) {
307 ast_taken(AST_ALL, intr);
308 ml_set_interrupts_enabled(FALSE);
309 myast = ast_pending();
310 }
311 ml_set_interrupts_enabled(intr);
312
313 thread_exception_return();
314 /*NOTREACHED*/
315 }
316
317 #if MACH_KDB
318 if (debug_all_traps_with_kdb &&
319 kdb_trap(trapno, err, saved_state32(regs))) {
320 thread_exception_return();
321 /*NOTREACHED*/
322 }
323 #endif /* MACH_KDB */
324
325
326 i386_exception(EXC_BAD_ACCESS, kr, vaddr);
327 /*NOTREACHED*/
328 }
329
330 /*
331 * Fault recovery in copyin/copyout routines.
332 */
333 struct recovery {
334 uintptr_t fault_addr;
335 uintptr_t recover_addr;
336 };
337
338 extern struct recovery recover_table[];
339 extern struct recovery recover_table_end[];
340
341 const char * trap_type[] = {TRAP_NAMES};
342 unsigned TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]);
343
344 #if defined(__x86_64__) && DEBUG
345 static void
346 print_state(x86_saved_state64_t *saved_state)
347 {
348 kprintf("current_cpu_datap() 0x%lx\n", (uintptr_t)current_cpu_datap());
349 kprintf("Current GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_GS_BASE));
350 kprintf("Kernel GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_KERNEL_GS_BASE));
351 kprintf("state at 0x%lx:\n", (uintptr_t) saved_state);
352
353 kprintf(" rdi 0x%llx\n", saved_state->rdi);
354 kprintf(" rsi 0x%llx\n", saved_state->rsi);
355 kprintf(" rdx 0x%llx\n", saved_state->rdx);
356 kprintf(" r10 0x%llx\n", saved_state->r10);
357 kprintf(" r8 0x%llx\n", saved_state->r8);
358 kprintf(" r9 0x%llx\n", saved_state->r9);
359 kprintf(" v_arg6 0x%llx\n", saved_state->v_arg6);
360 kprintf(" v_arg7 0x%llx\n", saved_state->v_arg7);
361 kprintf(" v_arg8 0x%llx\n", saved_state->v_arg8);
362
363 kprintf(" cr2 0x%llx\n", saved_state->cr2);
364 kprintf("real cr2 0x%lx\n", get_cr2());
365 kprintf(" r15 0x%llx\n", saved_state->r15);
366 kprintf(" r14 0x%llx\n", saved_state->r14);
367 kprintf(" r13 0x%llx\n", saved_state->r13);
368 kprintf(" r12 0x%llx\n", saved_state->r12);
369 kprintf(" r11 0x%llx\n", saved_state->r11);
370 kprintf(" rbp 0x%llx\n", saved_state->rbp);
371 kprintf(" rbx 0x%llx\n", saved_state->rbx);
372 kprintf(" rcx 0x%llx\n", saved_state->rcx);
373 kprintf(" rax 0x%llx\n", saved_state->rax);
374
375 kprintf(" gs 0x%x\n", saved_state->gs);
376 kprintf(" fs 0x%x\n", saved_state->fs);
377
378 kprintf(" isf.trapno 0x%x\n", saved_state->isf.trapno);
379 kprintf(" isf._pad 0x%x\n", saved_state->isf._pad);
380 kprintf(" isf.trapfn 0x%llx\n", saved_state->isf.trapfn);
381 kprintf(" isf.err 0x%llx\n", saved_state->isf.err);
382 kprintf(" isf.rip 0x%llx\n", saved_state->isf.rip);
383 kprintf(" isf.cs 0x%llx\n", saved_state->isf.cs);
384 kprintf(" isf.rflags 0x%llx\n", saved_state->isf.rflags);
385 kprintf(" isf.rsp 0x%llx\n", saved_state->isf.rsp);
386 kprintf(" isf.ss 0x%llx\n", saved_state->isf.ss);
387 }
388 /*
389 * K64 debug - fatal handler for debug code in the trap vectors.
390 */
391 extern void
392 panic_idt64(x86_saved_state_t *rsp);
393 void
394 panic_idt64(x86_saved_state_t *rsp)
395 {
396 print_state(saved_state64(rsp));
397 panic("panic_idt64");
398 }
399 #endif
400
401
402
403 /*
404 * Non-zero indicates latency assert is enabled and capped at valued
405 * absolute time units.
406 */
407
408 uint64_t interrupt_latency_cap = 0;
409 boolean_t ilat_assert = FALSE;
410
411 void
412 interrupt_latency_tracker_setup(void) {
413 uint32_t ilat_cap_us;
414 if (PE_parse_boot_argn("interrupt_latency_cap_us", &ilat_cap_us, sizeof(ilat_cap_us))) {
415 interrupt_latency_cap = ilat_cap_us * NSEC_PER_USEC;
416 nanoseconds_to_absolutetime(interrupt_latency_cap, &interrupt_latency_cap);
417 } else {
418 interrupt_latency_cap = LockTimeOut;
419 }
420 PE_parse_boot_argn("-interrupt_latency_assert_enable", &ilat_assert, sizeof(ilat_assert));
421 }
422
423 void interrupt_reset_latency_stats(void) {
424 uint32_t i;
425 for (i = 0; i < real_ncpus; i++) {
426 cpu_data_ptr[i]->cpu_max_observed_int_latency =
427 cpu_data_ptr[i]->cpu_max_observed_int_latency_vector = 0;
428 }
429 }
430
431 void interrupt_populate_latency_stats(char *buf, unsigned bufsize) {
432 uint32_t i, tcpu = ~0;
433 uint64_t cur_max = 0;
434
435 for (i = 0; i < real_ncpus; i++) {
436 if (cur_max < cpu_data_ptr[i]->cpu_max_observed_int_latency) {
437 cur_max = cpu_data_ptr[i]->cpu_max_observed_int_latency;
438 tcpu = i;
439 }
440 }
441
442 if (tcpu < real_ncpus)
443 snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency);
444 }
445
446
447 extern void PE_incoming_interrupt(int interrupt);
448
449 /*
450 * Handle interrupts:
451 * - local APIC interrupts (IPIs, timers, etc) are handled by the kernel,
452 * - device interrupts go to the platform expert.
453 */
454 void
455 interrupt(x86_saved_state_t *state)
456 {
457 uint64_t rip;
458 uint64_t rsp;
459 int interrupt_num;
460 boolean_t user_mode = FALSE;
461 int cnum = cpu_number();
462
463 if (is_saved_state64(state) == TRUE) {
464 x86_saved_state64_t *state64;
465
466 state64 = saved_state64(state);
467 rip = state64->isf.rip;
468 rsp = state64->isf.rsp;
469 interrupt_num = state64->isf.trapno;
470 #ifdef __x86_64__
471 if(state64->isf.cs & 0x03)
472 #endif
473 user_mode = TRUE;
474 } else {
475 x86_saved_state32_t *state32;
476
477 state32 = saved_state32(state);
478 if (state32->cs & 0x03)
479 user_mode = TRUE;
480 rip = state32->eip;
481 rsp = state32->uesp;
482 interrupt_num = state32->trapno;
483 }
484
485 KERNEL_DEBUG_CONSTANT(
486 MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
487 interrupt_num, (long) rip, user_mode, 0, 0);
488
489 /*
490 * Handle local APIC interrupts
491 * else call platform expert for devices.
492 */
493 if (!lapic_interrupt(interrupt_num, state)) {
494 PE_incoming_interrupt(interrupt_num);
495 }
496
497 KERNEL_DEBUG_CONSTANT(
498 MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
499 0, 0, 0, 0, 0);
500
501 if (cpu_data_ptr[cnum]->cpu_nested_istack) {
502 cpu_data_ptr[cnum]->cpu_nested_istack_events++;
503 }
504 else {
505 uint64_t int_latency = mach_absolute_time() - cpu_data_ptr[cnum]->cpu_int_event_time;
506 if (ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended()) {
507 panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals);
508 }
509 if (int_latency > cpu_data_ptr[cnum]->cpu_max_observed_int_latency) {
510 cpu_data_ptr[cnum]->cpu_max_observed_int_latency = int_latency;
511 cpu_data_ptr[cnum]->cpu_max_observed_int_latency_vector = interrupt_num;
512 }
513 }
514
515
516 /*
517 * Having serviced the interrupt first, look at the interrupted stack depth.
518 */
519 if (!user_mode) {
520 uint64_t depth = cpu_data_ptr[cnum]->cpu_kernel_stack
521 + sizeof(struct x86_kernel_state)
522 + sizeof(struct i386_exception_link *)
523 - rsp;
524 if (depth > kernel_stack_depth_max) {
525 kernel_stack_depth_max = (vm_offset_t)depth;
526 KERNEL_DEBUG_CONSTANT(
527 MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH),
528 (long) depth, (long) rip, 0, 0, 0);
529 }
530 }
531 }
532
533 static inline void
534 reset_dr7(void)
535 {
536 long dr7 = 0x400; /* magic dr7 reset value; 32 bit on i386, 64 bit on x86_64 */
537 __asm__ volatile("mov %0,%%dr7" : : "r" (dr7));
538 }
539 #if MACH_KDP
540 unsigned kdp_has_active_watchpoints = 0;
541 #define NO_WATCHPOINTS (!kdp_has_active_watchpoints)
542 #else
543 #define NO_WATCHPOINTS 1
544 #endif
545 /*
546 * Trap from kernel mode. Only page-fault errors are recoverable,
547 * and then only in special circumstances. All other errors are
548 * fatal. Return value indicates if trap was handled.
549 */
550
551 void
552 kernel_trap(
553 x86_saved_state_t *state)
554 {
555 #ifdef __i386__
556 x86_saved_state32_t *saved_state;
557 #else
558 x86_saved_state64_t *saved_state;
559 #endif
560 int code;
561 user_addr_t vaddr;
562 int type;
563 vm_map_t map = 0; /* protected by T_PAGE_FAULT */
564 kern_return_t result = KERN_FAILURE;
565 thread_t thread;
566 ast_t *myast;
567 boolean_t intr;
568 vm_prot_t prot;
569 struct recovery *rp;
570 vm_offset_t kern_ip;
571 #if NCOPY_WINDOWS > 0
572 int fault_in_copy_window = -1;
573 #endif
574 int is_user = 0;
575 #if MACH_KDB
576 pt_entry_t *pte;
577 #endif /* MACH_KDB */
578
579 thread = current_thread();
580
581 #ifdef __i386__
582 if (is_saved_state64(state)) {
583 panic_64(state, 0, "Kernel trap with 64-bit state", FALSE);
584 }
585 saved_state = saved_state32(state);
586 vaddr = (user_addr_t)saved_state->cr2;
587 type = saved_state->trapno;
588 code = saved_state->err & 0xffff;
589 intr = (saved_state->efl & EFL_IF) != 0; /* state of ints at trap */
590 kern_ip = (vm_offset_t)saved_state->eip;
591 #else
592 if (is_saved_state32(state))
593 panic("kernel_trap(%p) with 32-bit state", state);
594 saved_state = saved_state64(state);
595 vaddr = (user_addr_t)saved_state->cr2;
596 type = saved_state->isf.trapno;
597 code = (int)(saved_state->isf.err & 0xffff);
598 intr = (saved_state->isf.rflags & EFL_IF) != 0; /* state of ints at trap */
599 kern_ip = (vm_offset_t)saved_state->isf.rip;
600 #endif
601
602 myast = ast_pending();
603
604 perfCallback fn = perfASTHook;
605 if (fn) {
606 if (*myast & AST_CHUD_ALL)
607 fn(type, NULL, 0, 0);
608 } else
609 *myast &= ~AST_CHUD_ALL;
610
611 /*
612 * Is there a hook?
613 */
614 fn = perfTrapHook;
615 if (fn) {
616 if (fn(type, NULL, 0, 0) == KERN_SUCCESS) {
617 /*
618 * If it succeeds, we are done...
619 */
620 return;
621 }
622 }
623
624 #if CONFIG_DTRACE
625 if (tempDTraceTrapHook) {
626 if (tempDTraceTrapHook(type, state, 0, 0) == KERN_SUCCESS) {
627 /*
628 * If it succeeds, we are done...
629 */
630 return;
631 }
632 }
633 #endif /* CONFIG_DTRACE */
634
635 /*
636 * we come here with interrupts off as we don't want to recurse
637 * on preemption below. but we do want to re-enable interrupts
638 * as soon we possibly can to hold latency down
639 */
640 if (T_PREEMPT == type) {
641 ast_taken(AST_PREEMPTION, FALSE);
642
643 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
644 0, 0, 0, kern_ip, 0);
645 return;
646 }
647
648 if (T_PAGE_FAULT == type) {
649 /*
650 * assume we're faulting in the kernel map
651 */
652 map = kernel_map;
653
654 if (thread != THREAD_NULL && thread->map != kernel_map) {
655 #if NCOPY_WINDOWS > 0
656 vm_offset_t copy_window_base;
657 vm_offset_t kvaddr;
658 int window_index;
659
660 kvaddr = (vm_offset_t)vaddr;
661 /*
662 * must determine if fault occurred in
663 * the copy window while pre-emption is
664 * disabled for this processor so that
665 * we only need to look at the window
666 * associated with this processor
667 */
668 copy_window_base = current_cpu_datap()->cpu_copywindow_base;
669
670 if (kvaddr >= copy_window_base && kvaddr < (copy_window_base + (NBPDE * NCOPY_WINDOWS)) ) {
671
672 window_index = (kvaddr - copy_window_base) / NBPDE;
673
674 if (thread->machine.copy_window[window_index].user_base != (user_addr_t)-1) {
675
676 kvaddr -= (copy_window_base + (NBPDE * window_index));
677 vaddr = thread->machine.copy_window[window_index].user_base + kvaddr;
678
679 map = thread->map;
680 fault_in_copy_window = window_index;
681 }
682 is_user = -1;
683 }
684 #else
685 if (vaddr < VM_MAX_USER_PAGE_ADDRESS) {
686 /* fault occurred in userspace */
687 map = thread->map;
688 is_user = -1;
689 /*
690 * If we're not sharing cr3 with the user
691 * and we faulted in copyio,
692 * then switch cr3 here and dismiss the fault.
693 */
694 if (no_shared_cr3 &&
695 (thread->machine.specFlags&CopyIOActive) &&
696 map->pmap->pm_cr3 != get_cr3()) {
697 set_cr3(map->pmap->pm_cr3);
698 return;
699 }
700 }
701 #endif
702 }
703 }
704 KERNEL_DEBUG_CONSTANT(
705 (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
706 (unsigned)(vaddr >> 32), (unsigned)vaddr, is_user, kern_ip, 0);
707
708
709 (void) ml_set_interrupts_enabled(intr);
710
711 switch (type) {
712
713 case T_NO_FPU:
714 fpnoextflt();
715 return;
716
717 case T_FPU_FAULT:
718 fpextovrflt();
719 return;
720
721 case T_FLOATING_POINT_ERROR:
722 fpexterrflt();
723 return;
724
725 case T_SSE_FLOAT_ERROR:
726 fpSSEexterrflt();
727 return;
728 case T_DEBUG:
729 #ifdef __i386__
730 if ((saved_state->efl & EFL_TF) == 0 && NO_WATCHPOINTS)
731 #else
732 if ((saved_state->isf.rflags & EFL_TF) == 0 && NO_WATCHPOINTS)
733 #endif
734 {
735 /* We've somehow encountered a debug
736 * register match that does not belong
737 * to the kernel debugger.
738 * This isn't supposed to happen.
739 */
740 reset_dr7();
741 return;
742 }
743 goto debugger_entry;
744 #ifdef __x86_64__
745 case T_INT3:
746 goto debugger_entry;
747 #endif
748 case T_PAGE_FAULT:
749 /*
750 * If the current map is a submap of the kernel map,
751 * and the address is within that map, fault on that
752 * map. If the same check is done in vm_fault
753 * (vm_map_lookup), we may deadlock on the kernel map
754 * lock.
755 */
756
757 prot = VM_PROT_READ;
758
759 if (code & T_PF_WRITE)
760 prot |= VM_PROT_WRITE;
761 #if PAE
762 if (code & T_PF_EXECUTE)
763 prot |= VM_PROT_EXECUTE;
764 #endif
765
766 #if MACH_KDB
767 /*
768 * Check for watchpoint on kernel static data.
769 * vm_fault would fail in this case
770 */
771 if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted &&
772 (code & T_PF_WRITE) && vaddr < vm_map_max(map) &&
773 ((*(pte = pmap_pte(kernel_pmap, (vm_map_offset_t)vaddr))) & INTEL_PTE_WRITE) == 0) {
774 pmap_store_pte(
775 pte,
776 *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE);
777 /* XXX need invltlb here? */
778
779 result = KERN_SUCCESS;
780 goto look_for_watchpoints;
781 }
782 #endif /* MACH_KDB */
783
784 #if CONFIG_DTRACE
785 if (thread->options & TH_OPT_DTRACE) { /* Executing under dtrace_probe? */
786 if (dtrace_tally_fault(vaddr)) { /* Should a fault under dtrace be ignored? */
787 /*
788 * DTrace has "anticipated" the possibility of this fault, and has
789 * established the suitable recovery state. Drop down now into the
790 * recovery handling code in "case T_GENERAL_PROTECTION:".
791 */
792 goto FALL_THROUGH;
793 }
794 }
795 #endif /* CONFIG_DTRACE */
796
797 result = vm_fault(map,
798 vm_map_trunc_page(vaddr),
799 prot,
800 FALSE,
801 THREAD_UNINT, NULL, 0);
802
803 #if MACH_KDB
804 if (result == KERN_SUCCESS) {
805 /*
806 * Look for watchpoints
807 */
808 look_for_watchpoints:
809 if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && (code & T_PF_WRITE) &&
810 db_find_watchpoint(map, vaddr, saved_state))
811 kdb_trap(T_WATCHPOINT, 0, saved_state);
812 }
813 #endif /* MACH_KDB */
814
815 if (result == KERN_SUCCESS) {
816 #if NCOPY_WINDOWS > 0
817 if (fault_in_copy_window != -1) {
818 pt_entry_t *updp;
819 pt_entry_t *kpdp;
820
821 /*
822 * in case there was no page table assigned
823 * for the user base address and the pmap
824 * got 'expanded' due to this fault, we'll
825 * copy in the descriptor
826 *
827 * we're either setting the page table descriptor
828 * to the same value or it was 0... no need
829 * for a TLB flush in either case
830 */
831
832 ml_set_interrupts_enabled(FALSE);
833 updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base);
834 assert(updp);
835 if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */
836 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
837 kpdp += fault_in_copy_window;
838
839 #if JOE_DEBUG
840 if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME))
841 panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp);
842 #endif
843 pmap_store_pte(kpdp, *updp);
844
845 (void) ml_set_interrupts_enabled(intr);
846 }
847 #endif /* NCOPY_WINDOWS > 0 */
848 return;
849 }
850 /*
851 * fall through
852 */
853 #if CONFIG_DTRACE
854 FALL_THROUGH:
855 #endif /* CONFIG_DTRACE */
856
857 case T_GENERAL_PROTECTION:
858 #if defined(__x86_64__) && DEBUG
859 print_state(saved_state);
860 #endif
861 /*
862 * If there is a failure recovery address
863 * for this fault, go there.
864 */
865 for (rp = recover_table; rp < recover_table_end; rp++) {
866 if (kern_ip == rp->fault_addr) {
867 set_recovery_ip(saved_state, rp->recover_addr);
868 return;
869 }
870 }
871
872 /*
873 * Check thread recovery address also.
874 */
875 if (thread->recover) {
876 set_recovery_ip(saved_state, thread->recover);
877 thread->recover = 0;
878 return;
879 }
880 /*
881 * Unanticipated page-fault errors in kernel
882 * should not happen.
883 *
884 * fall through...
885 */
886
887 default:
888 /*
889 * Exception 15 is reserved but some chips may generate it
890 * spuriously. Seen at startup on AMD Athlon-64.
891 */
892 if (type == 15) {
893 kprintf("kernel_trap() ignoring spurious trap 15\n");
894 return;
895 }
896 debugger_entry:
897 /* Ensure that the i386_kernel_state at the base of the
898 * current thread's stack (if any) is synchronized with the
899 * context at the moment of the trap, to facilitate
900 * access through the debugger.
901 */
902 sync_iss_to_iks(state);
903 #if MACH_KDB
904 restart_debugger:
905 #endif /* MACH_KDB */
906 #if MACH_KDP
907 if (current_debugger != KDB_CUR_DB) {
908 if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr))
909 return;
910 } else {
911 #endif /* MACH_KDP */
912 #if MACH_KDB
913 if (kdb_trap(type, code, saved_state)) {
914 if (switch_debugger) {
915 current_debugger = KDP_CUR_DB;
916 switch_debugger = 0;
917 goto restart_debugger;
918 }
919 return;
920 }
921 #endif /* MACH_KDB */
922 #if MACH_KDP
923 }
924 #endif
925 }
926
927 panic_trap(saved_state);
928 /*
929 * NO RETURN
930 */
931 }
932
933
934 #ifdef __i386__
935 static void
936 set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip)
937 {
938 saved_state->eip = ip;
939 }
940 #else
941 static void
942 set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip)
943 {
944 saved_state->isf.rip = ip;
945 }
946 #endif
947
948
949 #ifdef __i386__
950 static void
951 panic_trap(x86_saved_state32_t *regs)
952 {
953 const char *trapname = "Unknown";
954 uint32_t cr0 = get_cr0();
955 uint32_t cr2 = get_cr2();
956 uint32_t cr3 = get_cr3();
957 uint32_t cr4 = get_cr4();
958 /*
959 * Issue an I/O port read if one has been requested - this is an
960 * event logic analyzers can use as a trigger point.
961 */
962 panic_io_port_read();
963
964 kprintf("panic trap number 0x%x, eip 0x%x\n", regs->trapno, regs->eip);
965 kprintf("cr0 0x%08x cr2 0x%08x cr3 0x%08x cr4 0x%08x\n",
966 cr0, cr2, cr3, cr4);
967
968 if (regs->trapno < TRAP_TYPES)
969 trapname = trap_type[regs->trapno];
970 #undef panic
971 panic("Kernel trap at 0x%08x, type %d=%s, registers:\n"
972 "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
973 "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
974 "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
975 "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n"
976 "Error code: 0x%08x\n",
977 regs->eip, regs->trapno, trapname, cr0, cr2, cr3, cr4,
978 regs->eax,regs->ebx,regs->ecx,regs->edx,
979 regs->cr2,regs->ebp,regs->esi,regs->edi,
980 regs->efl,regs->eip,regs->cs, regs->ds, regs->err);
981 /*
982 * This next statement is not executed,
983 * but it's needed to stop the compiler using tail call optimization
984 * for the panic call - which confuses the subsequent backtrace.
985 */
986 cr0 = 0;
987 }
988 #else
989 static void
990 panic_trap(x86_saved_state64_t *regs)
991 {
992 const char *trapname = "Unknown";
993 uint64_t cr0 = get_cr0();
994 uint64_t cr2 = get_cr2();
995 uint64_t cr3 = get_cr3();
996 uint64_t cr4 = get_cr4();
997
998 /*
999 * Issue an I/O port read if one has been requested - this is an
1000 * event logic analyzers can use as a trigger point.
1001 */
1002 panic_io_port_read();
1003
1004 kprintf("panic trap number 0x%x, rip 0x%016llx\n",
1005 regs->isf.trapno, regs->isf.rip);
1006 kprintf("cr0 0x%016llx cr2 0x%016llx cr3 0x%016llx cr4 0x%016llx\n",
1007 cr0, cr2, cr3, cr4);
1008
1009 if (regs->isf.trapno < TRAP_TYPES)
1010 trapname = trap_type[regs->isf.trapno];
1011 #undef panic
1012 panic("Kernel trap at 0x%016llx, type %d=%s, registers:\n"
1013 "CR0: 0x%016llx, CR2: 0x%016llx, CR3: 0x%016llx, CR4: 0x%016llx\n"
1014 "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n"
1015 "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n"
1016 "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
1017 "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
1018 "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n"
1019 "Error code: 0x%016llx\n",
1020 regs->isf.rip, regs->isf.trapno, trapname,
1021 cr0, cr2, cr3, cr4,
1022 regs->rax, regs->rbx, regs->rcx, regs->rdx,
1023 regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi,
1024 regs->r8, regs->r9, regs->r10, regs->r11,
1025 regs->r12, regs->r13, regs->r14, regs->r15,
1026 regs->isf.rflags, regs->isf.rip, regs->isf.cs, regs->isf.ss,
1027 regs->isf.err);
1028 /*
1029 * This next statement is not executed,
1030 * but it's needed to stop the compiler using tail call optimization
1031 * for the panic call - which confuses the subsequent backtrace.
1032 */
1033 cr0 = 0;
1034 }
1035 #endif
1036
1037 extern void kprintf_break_lock(void);
1038
1039 #ifdef __i386__
1040 static void
1041 panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t do_mca_dump, boolean_t do_bt)
1042 {
1043 struct i386_tss *my_ktss = current_ktss();
1044
1045 /* Set postcode (DEBUG only) */
1046 postcode(pc);
1047
1048 /*
1049 * Issue an I/O port read if one has been requested - this is an
1050 * event logic analyzers can use as a trigger point.
1051 */
1052 panic_io_port_read();
1053
1054 /*
1055 * Break kprintf lock in case of recursion,
1056 * and record originally faulted instruction address.
1057 */
1058 kprintf_break_lock();
1059
1060 if (do_mca_dump) {
1061 #if CONFIG_MCA
1062 /*
1063 * Dump the contents of the machine check MSRs (if any).
1064 */
1065 mca_dump();
1066 #endif
1067 }
1068
1069 #if MACH_KDP
1070 /*
1071 * Print backtrace leading to first fault:
1072 */
1073 if (do_bt)
1074 panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL);
1075 #endif
1076
1077 panic("%s at 0x%08x, thread:%p, code:0x%x, "
1078 "registers:\n"
1079 "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
1080 "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
1081 "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
1082 "EFL: 0x%08x, EIP: 0x%08x\n",
1083 msg,
1084 my_ktss->eip, current_thread(), code,
1085 (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
1086 my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
1087 my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
1088 my_ktss->eflags, my_ktss->eip);
1089 }
1090
1091 /*
1092 * Called from locore on a special reserved stack after a double-fault
1093 * is taken in kernel space.
1094 * Kernel stack overflow is one route here.
1095 */
1096 void
1097 panic_double_fault32(int code)
1098 {
1099 panic_32(code, PANIC_DOUBLE_FAULT, "Double fault", FALSE, TRUE);
1100 }
1101
1102 /*
1103 * Called from locore on a special reserved stack after a machine-check
1104 */
1105 void
1106 panic_machine_check32(int code)
1107 {
1108 panic_32(code, PANIC_MACHINE_CHECK, "Machine-check", TRUE, FALSE);
1109 }
1110 #endif /* __i386__ */
1111
1112 static void
1113 panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump)
1114 {
1115 /* Set postcode (DEBUG only) */
1116 postcode(pc);
1117
1118 /*
1119 * Issue an I/O port read if one has been requested - this is an
1120 * event logic analyzers can use as a trigger point.
1121 */
1122 panic_io_port_read();
1123
1124 /*
1125 * Break kprintf lock in case of recursion,
1126 * and record originally faulted instruction address.
1127 */
1128 kprintf_break_lock();
1129
1130 if (do_mca_dump) {
1131 #if CONFIG_MCA
1132 /*
1133 * Dump the contents of the machine check MSRs (if any).
1134 */
1135 mca_dump();
1136 #endif
1137 }
1138
1139 #ifdef __i386__
1140 /*
1141 * Dump the interrupt stack frame at last kernel entry.
1142 */
1143 if (is_saved_state64(sp)) {
1144 x86_saved_state64_t *ss64p = saved_state64(sp);
1145 panic("%s thread:%p, trapno:0x%x, err:0x%qx, "
1146 "registers:\n"
1147 "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
1148 "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n"
1149 "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
1150 "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
1151 "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
1152 "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx\n",
1153 msg,
1154 current_thread(), ss64p->isf.trapno, ss64p->isf.err,
1155 (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
1156 ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx,
1157 ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
1158 ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
1159 ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
1160 ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2);
1161 } else {
1162 x86_saved_state32_t *ss32p = saved_state32(sp);
1163 panic("%s at 0x%08x, thread:%p, trapno:0x%x, err:0x%x,"
1164 "registers:\n"
1165 "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
1166 "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
1167 "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
1168 "EFL: 0x%08x, EIP: 0x%08x\n",
1169 msg,
1170 ss32p->eip, current_thread(), ss32p->trapno, ss32p->err,
1171 (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
1172 ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
1173 ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
1174 ss32p->efl, ss32p->eip);
1175 }
1176 #else
1177 x86_saved_state64_t *regs = saved_state64(sp);
1178 panic("%s thread:%p at 0x%016llx, registers:\n"
1179 "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n"
1180 "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n"
1181 "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n"
1182 "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
1183 "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
1184 "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n"
1185 "Error code: 0x%016llx\n",
1186 msg,
1187 current_thread(), regs->isf.rip,
1188 get_cr0(), get_cr2(), get_cr3(), get_cr4(),
1189 regs->rax, regs->rbx, regs->rcx, regs->rdx,
1190 regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi,
1191 regs->r8, regs->r9, regs->r10, regs->r11,
1192 regs->r12, regs->r13, regs->r14, regs->r15,
1193 regs->isf.rflags, regs->isf.rip, regs->isf.cs, regs->isf.ss,
1194 regs->isf.err);
1195 #endif
1196 }
1197
1198 void
1199 panic_double_fault64(x86_saved_state_t *sp)
1200 {
1201 panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE);
1202
1203 }
1204 void
1205
1206 panic_machine_check64(x86_saved_state_t *sp)
1207 {
1208 panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE);
1209
1210 }
1211
1212 #if CONFIG_DTRACE
1213 extern kern_return_t dtrace_user_probe(x86_saved_state_t *);
1214 #endif
1215
1216 /*
1217 * Trap from user mode.
1218 */
1219 void
1220 user_trap(
1221 x86_saved_state_t *saved_state)
1222 {
1223 int exc;
1224 int err;
1225 mach_exception_code_t code;
1226 mach_exception_subcode_t subcode;
1227 int type;
1228 user_addr_t vaddr;
1229 vm_prot_t prot;
1230 thread_t thread = current_thread();
1231 ast_t *myast;
1232 kern_return_t kret;
1233 user_addr_t rip;
1234
1235 assert((is_saved_state32(saved_state) && !thread_is_64bit(thread)) ||
1236 (is_saved_state64(saved_state) && thread_is_64bit(thread)));
1237
1238 if (is_saved_state64(saved_state)) {
1239 x86_saved_state64_t *regs;
1240
1241 regs = saved_state64(saved_state);
1242
1243 type = regs->isf.trapno;
1244 err = (int)regs->isf.err & 0xffff;
1245 vaddr = (user_addr_t)regs->cr2;
1246 rip = (user_addr_t)regs->isf.rip;
1247 } else {
1248 x86_saved_state32_t *regs;
1249
1250 regs = saved_state32(saved_state);
1251
1252 type = regs->trapno;
1253 err = regs->err & 0xffff;
1254 vaddr = (user_addr_t)regs->cr2;
1255 rip = (user_addr_t)regs->eip;
1256 }
1257
1258 KERNEL_DEBUG_CONSTANT(
1259 (MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE,
1260 (unsigned)(vaddr>>32), (unsigned)vaddr,
1261 (unsigned)(rip>>32), (unsigned)rip, 0);
1262
1263 code = 0;
1264 subcode = 0;
1265 exc = 0;
1266
1267 #if DEBUG_TRACE
1268 kprintf("user_trap(0x%08x) type=%d vaddr=0x%016llx\n",
1269 saved_state, type, vaddr);
1270 #endif
1271 perfCallback fn = perfASTHook;
1272 if (fn) {
1273 myast = ast_pending();
1274 if (*myast & AST_CHUD_ALL) {
1275 fn(type, saved_state, 0, 0);
1276 }
1277 }
1278
1279 /* Is there a hook? */
1280 fn = perfTrapHook;
1281 if (fn) {
1282 if (fn(type, saved_state, 0, 0) == KERN_SUCCESS)
1283 return; /* If it succeeds, we are done... */
1284 }
1285
1286 /*
1287 * DTrace does not consume all user traps, only INT_3's for now.
1288 * Avoid needlessly calling tempDTraceTrapHook here, and let the
1289 * INT_3 case handle them.
1290 */
1291 DEBUG_KPRINT_SYSCALL_MASK(1,
1292 "user_trap: type=0x%x(%s) err=0x%x cr2=%p rip=%p\n",
1293 type, trap_type[type], err, (void *)(long) vaddr, (void *)(long) rip);
1294
1295 switch (type) {
1296
1297 case T_DIVIDE_ERROR:
1298 exc = EXC_ARITHMETIC;
1299 code = EXC_I386_DIV;
1300 break;
1301
1302 case T_DEBUG:
1303 {
1304 pcb_t pcb;
1305 long clear = 0; /* 32 bit for i386, 64 bit for x86_64 */
1306 /*
1307 * get dr6 and set it in the thread's pcb before
1308 * returning to userland
1309 */
1310 pcb = thread->machine.pcb;
1311 if (pcb->ids) {
1312 /*
1313 * We can get and set the status register
1314 * in 32-bit mode even on a 64-bit thread
1315 * because the high order bits are not
1316 * used on x86_64
1317 */
1318 unsigned long dr6_temp; /* 32 bit for i386, 64 bit for x86_64 */
1319 __asm__ volatile ("mov %%db6, %0" : "=r" (dr6_temp)); /* Register constraint by necessity */
1320 if (thread_is_64bit(thread)) {
1321 x86_debug_state64_t *ids = pcb->ids;
1322 ids->dr6 = dr6_temp;
1323 } else { /* 32 bit thread */
1324 x86_debug_state32_t *ids = pcb->ids;
1325 ids->dr6 = (uint32_t) dr6_temp;
1326 }
1327 __asm__ volatile ("mov %0, %%db6" : : "r" (clear));
1328 }
1329 exc = EXC_BREAKPOINT;
1330 code = EXC_I386_SGL;
1331 break;
1332 }
1333 case T_INT3:
1334 #if CONFIG_DTRACE
1335 if (dtrace_user_probe(saved_state) == KERN_SUCCESS)
1336 return; /* If it succeeds, we are done... */
1337 #endif
1338 exc = EXC_BREAKPOINT;
1339 code = EXC_I386_BPT;
1340 break;
1341
1342 case T_OVERFLOW:
1343 exc = EXC_ARITHMETIC;
1344 code = EXC_I386_INTO;
1345 break;
1346
1347 case T_OUT_OF_BOUNDS:
1348 exc = EXC_SOFTWARE;
1349 code = EXC_I386_BOUND;
1350 break;
1351
1352 case T_INVALID_OPCODE:
1353 exc = EXC_BAD_INSTRUCTION;
1354 code = EXC_I386_INVOP;
1355 break;
1356
1357 case T_NO_FPU:
1358 fpnoextflt();
1359 return;
1360
1361 case T_FPU_FAULT:
1362 fpextovrflt(); /* Propagates exception directly, doesn't return */
1363 return;
1364
1365 case T_INVALID_TSS: /* invalid TSS == iret with NT flag set */
1366 exc = EXC_BAD_INSTRUCTION;
1367 code = EXC_I386_INVTSSFLT;
1368 subcode = err;
1369 break;
1370
1371 case T_SEGMENT_NOT_PRESENT:
1372 exc = EXC_BAD_INSTRUCTION;
1373 code = EXC_I386_SEGNPFLT;
1374 subcode = err;
1375 break;
1376
1377 case T_STACK_FAULT:
1378 exc = EXC_BAD_INSTRUCTION;
1379 code = EXC_I386_STKFLT;
1380 subcode = err;
1381 break;
1382
1383 case T_GENERAL_PROTECTION:
1384 /*
1385 * There's a wide range of circumstances which generate this
1386 * class of exception. From user-space, many involve bad
1387 * addresses (such as a non-canonical 64-bit address).
1388 * So we map this to EXC_BAD_ACCESS (and thereby SIGSEGV).
1389 * The trouble is cr2 doesn't contain the faulting address;
1390 * we'd need to decode the faulting instruction to really
1391 * determine this. We'll leave that to debuggers.
1392 * However, attempted execution of privileged instructions
1393 * (e.g. cli) also generate GP faults and so we map these to
1394 * to EXC_BAD_ACCESS (and thence SIGSEGV) also - rather than
1395 * EXC_BAD_INSTRUCTION which is more accurate. We just can't
1396 * win!
1397 */
1398 exc = EXC_BAD_ACCESS;
1399 code = EXC_I386_GPFLT;
1400 subcode = err;
1401 break;
1402
1403 case T_PAGE_FAULT:
1404 prot = VM_PROT_READ;
1405
1406 if (err & T_PF_WRITE)
1407 prot |= VM_PROT_WRITE;
1408 #if PAE
1409 if (err & T_PF_EXECUTE)
1410 prot |= VM_PROT_EXECUTE;
1411 #endif
1412 kret = vm_fault(thread->map, vm_map_trunc_page(vaddr),
1413 prot, FALSE,
1414 THREAD_ABORTSAFE, NULL, 0);
1415
1416 user_page_fault_continue(kret);
1417
1418 /* NOTREACHED */
1419 break;
1420
1421 case T_SSE_FLOAT_ERROR:
1422 fpSSEexterrflt(); /* Propagates exception directly, doesn't return */
1423 return;
1424
1425
1426 case T_FLOATING_POINT_ERROR:
1427 fpexterrflt(); /* Propagates exception directly, doesn't return */
1428 return;
1429
1430 case T_DTRACE_RET:
1431 #if CONFIG_DTRACE
1432 if (dtrace_user_probe(saved_state) == KERN_SUCCESS)
1433 return; /* If it succeeds, we are done... */
1434 #endif
1435 /*
1436 * If we get an INT 0x7f when we do not expect to,
1437 * treat it as an illegal instruction
1438 */
1439 exc = EXC_BAD_INSTRUCTION;
1440 code = EXC_I386_INVOP;
1441 break;
1442
1443 default:
1444 #if MACH_KGDB
1445 Debugger("Unanticipated user trap");
1446 return;
1447 #endif /* MACH_KGDB */
1448 #if MACH_KDB
1449 if (kdb_trap(type, err, saved_state32(saved_state)))
1450 return;
1451 #endif /* MACH_KDB */
1452 panic("Unexpected user trap, type %d", type);
1453 return;
1454 }
1455 /* Note: Codepaths that directly return from user_trap() have pending
1456 * ASTs processed in locore
1457 */
1458 i386_exception(exc, code, subcode);
1459 /* NOTREACHED */
1460 }
1461
1462
1463 /*
1464 * Handle AST traps for i386.
1465 * Check for delayed floating-point exception from
1466 * AT-bus machines.
1467 */
1468
1469 extern void log_thread_action (thread_t, char *);
1470
1471 void
1472 i386_astintr(int preemption)
1473 {
1474 ast_t mask = AST_ALL;
1475 spl_t s;
1476
1477 if (preemption)
1478 mask = AST_PREEMPTION;
1479
1480 s = splsched();
1481
1482 ast_taken(mask, s);
1483
1484 splx(s);
1485 }
1486
1487 /*
1488 * Handle exceptions for i386.
1489 *
1490 * If we are an AT bus machine, we must turn off the AST for a
1491 * delayed floating-point exception.
1492 *
1493 * If we are providing floating-point emulation, we may have
1494 * to retrieve the real register values from the floating point
1495 * emulator.
1496 */
1497 void
1498 i386_exception(
1499 int exc,
1500 mach_exception_code_t code,
1501 mach_exception_subcode_t subcode)
1502 {
1503 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX];
1504
1505 DEBUG_KPRINT_SYSCALL_MACH("i386_exception: exc=%d code=0x%llx subcode=0x%llx\n",
1506 exc, code, subcode);
1507 codes[0] = code; /* new exception interface */
1508 codes[1] = subcode;
1509 exception_triage(exc, codes, 2);
1510 /*NOTREACHED*/
1511 }
1512
1513
1514
1515 void
1516 kernel_preempt_check(void)
1517 {
1518 ast_t *myast;
1519 boolean_t intr;
1520
1521 /*
1522 * disable interrupts to both prevent pre-emption
1523 * and to keep the ast state from changing via
1524 * an interrupt handler making something runnable
1525 */
1526 intr = ml_set_interrupts_enabled(FALSE);
1527
1528 myast = ast_pending();
1529
1530 if ((*myast & AST_URGENT) && intr == TRUE && get_interrupt_level() == 0) {
1531 /*
1532 * can handle interrupts and preemptions
1533 * at this point
1534 */
1535 ml_set_interrupts_enabled(intr);
1536
1537 /*
1538 * now cause the PRE-EMPTION trap
1539 */
1540 __asm__ volatile (" int $0xff");
1541 } else {
1542 /*
1543 * if interrupts were already disabled or
1544 * we're in an interrupt context, we can't
1545 * preempt... of course if AST_URGENT
1546 * isn't set we also don't want to
1547 */
1548 ml_set_interrupts_enabled(intr);
1549 }
1550 }
1551
1552 #if MACH_KDB
1553
1554 extern void db_i386_state(x86_saved_state32_t *regs);
1555
1556 #include <ddb/db_output.h>
1557
1558 void
1559 db_i386_state(
1560 x86_saved_state32_t *regs)
1561 {
1562 db_printf("eip %8x\n", regs->eip);
1563 db_printf("trap %8x\n", regs->trapno);
1564 db_printf("err %8x\n", regs->err);
1565 db_printf("efl %8x\n", regs->efl);
1566 db_printf("ebp %8x\n", regs->ebp);
1567 db_printf("esp %8x\n", regs->cr2);
1568 db_printf("uesp %8x\n", regs->uesp);
1569 db_printf("cs %8x\n", regs->cs & 0xff);
1570 db_printf("ds %8x\n", regs->ds & 0xff);
1571 db_printf("es %8x\n", regs->es & 0xff);
1572 db_printf("fs %8x\n", regs->fs & 0xff);
1573 db_printf("gs %8x\n", regs->gs & 0xff);
1574 db_printf("ss %8x\n", regs->ss & 0xff);
1575 db_printf("eax %8x\n", regs->eax);
1576 db_printf("ebx %8x\n", regs->ebx);
1577 db_printf("ecx %8x\n", regs->ecx);
1578 db_printf("edx %8x\n", regs->edx);
1579 db_printf("esi %8x\n", regs->esi);
1580 db_printf("edi %8x\n", regs->edi);
1581 }
1582
1583 #endif /* MACH_KDB */
1584
1585 /* Synchronize a thread's i386_kernel_state (if any) with the given
1586 * i386_saved_state_t obtained from the trap/IPI handler; called in
1587 * kernel_trap() prior to entering the debugger, and when receiving
1588 * an "MP_KDP" IPI.
1589 */
1590
1591 void
1592 sync_iss_to_iks(x86_saved_state_t *saved_state)
1593 {
1594 struct x86_kernel_state *iks;
1595 vm_offset_t kstack;
1596 boolean_t record_active_regs = FALSE;
1597
1598 if ((kstack = current_thread()->kernel_stack) != 0) {
1599 #ifdef __i386__
1600 x86_saved_state32_t *regs = saved_state32(saved_state);
1601 #else
1602 x86_saved_state64_t *regs = saved_state64(saved_state);
1603 #endif
1604
1605 iks = STACK_IKS(kstack);
1606
1607
1608 /* Did we take the trap/interrupt in kernel mode? */
1609 #ifdef __i386__
1610 if (regs == USER_REGS32(current_thread()))
1611 record_active_regs = TRUE;
1612 else {
1613 iks->k_ebx = regs->ebx;
1614 iks->k_esp = (int)regs;
1615 iks->k_ebp = regs->ebp;
1616 iks->k_edi = regs->edi;
1617 iks->k_esi = regs->esi;
1618 iks->k_eip = regs->eip;
1619 }
1620 #else
1621 if (regs == USER_REGS64(current_thread()))
1622 record_active_regs = TRUE;
1623 else {
1624 iks->k_rbx = regs->rbx;
1625 iks->k_rsp = regs->isf.rsp;
1626 iks->k_rbp = regs->rbp;
1627 iks->k_r12 = regs->r12;
1628 iks->k_r13 = regs->r13;
1629 iks->k_r14 = regs->r14;
1630 iks->k_r15 = regs->r15;
1631 iks->k_rip = regs->isf.rip;
1632 }
1633 #endif
1634 }
1635
1636 if (record_active_regs == TRUE) {
1637 #ifdef __i386__
1638 /* Show the trap handler path */
1639 __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx));
1640 __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp));
1641 __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp));
1642 __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi));
1643 __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi));
1644 /* "Current" instruction pointer */
1645 __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip));
1646 #else
1647 /* Show the trap handler path */
1648 __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx));
1649 __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp));
1650 __asm__ volatile("movq %%rbp, %0" : "=m" (iks->k_rbp));
1651 __asm__ volatile("movq %%r12, %0" : "=m" (iks->k_r12));
1652 __asm__ volatile("movq %%r13, %0" : "=m" (iks->k_r13));
1653 __asm__ volatile("movq %%r14, %0" : "=m" (iks->k_r14));
1654 __asm__ volatile("movq %%r15, %0" : "=m" (iks->k_r15));
1655 /* "Current" instruction pointer */
1656 __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
1657 : "=m" (iks->k_rip)
1658 :
1659 : "rax");
1660 #endif
1661 }
1662 }
1663
1664 /*
1665 * This is used by the NMI interrupt handler (from mp.c) to
1666 * uncondtionally sync the trap handler context to the IKS
1667 * irrespective of whether the NMI was fielded in kernel
1668 * or user space.
1669 */
1670 void
1671 sync_iss_to_iks_unconditionally(__unused x86_saved_state_t *saved_state) {
1672 struct x86_kernel_state *iks;
1673 vm_offset_t kstack;
1674
1675 if ((kstack = current_thread()->kernel_stack) != 0) {
1676 iks = STACK_IKS(kstack);
1677 #ifdef __i386__
1678 /* Display the trap handler path */
1679 __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx));
1680 __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp));
1681 __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp));
1682 __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi));
1683 __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi));
1684 /* "Current" instruction pointer */
1685 __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip));
1686 #else
1687 /* Display the trap handler path */
1688 __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx));
1689 __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp));
1690 __asm__ volatile("movq %%rbp, %0" : "=m" (iks->k_rbp));
1691 __asm__ volatile("movq %%r12, %0" : "=m" (iks->k_r12));
1692 __asm__ volatile("movq %%r13, %0" : "=m" (iks->k_r13));
1693 __asm__ volatile("movq %%r14, %0" : "=m" (iks->k_r14));
1694 __asm__ volatile("movq %%r15, %0" : "=m" (iks->k_r15));
1695 /* "Current" instruction pointer */
1696 __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" : "=m" (iks->k_rip)::"rax");
1697 #endif
1698 }
1699 }