2 * Copyright (c) 2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
31 #include <i386/eflags.h>
32 #include <i386/rtclock_asm.h>
33 #include <i386/trap.h>
34 #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
35 #include <mach/i386/syscall_sw.h>
36 #include <i386/postcode.h>
37 #include <i386/proc_reg.h>
38 #include <mach/exception_types.h>
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
49 * Code here is structured as follows:
51 * stubs Code called directly from an IDT vector.
52 * All entry points have the "idt64_" prefix and they are built
53 * using macros expanded by the inclusion of idt_table.h.
54 * This code performs vector-dependent identification and jumps
55 * into the dispatch code.
57 * dispatch The dispatch code is responsible for saving the thread state
58 * (which is either 64-bit or 32-bit) and then jumping to the
59 * class handler identified by the stub.
61 * returns Code to restore state and return to the previous context.
63 * handlers There are several classes of handlers:
64 * interrupt - asynchronous events typically from external devices
65 * trap - synchronous events due to thread execution
66 * syscall - synchronous system call request
73 #define HNDL_ALLINTRS EXT(hndl_allintrs)
74 #define HNDL_ALLTRAPS EXT(hndl_alltraps)
75 #define HNDL_SYSENTER EXT(hndl_sysenter)
76 #define HNDL_SYSCALL EXT(hndl_syscall)
77 #define HNDL_UNIX_SCALL EXT(hndl_unix_scall)
78 #define HNDL_MACH_SCALL EXT(hndl_mach_scall)
79 #define HNDL_MDEP_SCALL EXT(hndl_mdep_scall)
80 #define HNDL_DOUBLE_FAULT EXT(hndl_double_fault)
81 #define HNDL_MACHINE_CHECK EXT(hndl_machine_check)
85 #define PUSH_FUNCTION(func) \
88 leaq func(%rip), %rax ;\
92 #define PUSH_FUNCTION(func) pushq func
95 /* The wrapper for all non-special traps/interrupts */
96 /* Everything up to PUSH_FUNCTION is just to output
97 * the interrupt number out to the postcode display
100 #define IDT_ENTRY_WRAPPER(n, f) \
102 POSTCODE2(0x6400+n) ;\
108 #define IDT_ENTRY_WRAPPER(n, f) \
114 /* A trap that comes with an error code already on the stack */
115 #define TRAP_ERR(n, f) \
117 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
123 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
125 #define USER_TRAP TRAP
128 #define INTERRUPT(n) \
129 Entry(_intr_ ## n) ;\
131 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
133 /* A trap with a special-case handler, hence we don't need to define anything */
134 #define TRAP_SPC(n, f)
135 #define TRAP_IST1(n, f)
136 #define TRAP_IST2(n, f)
137 #define USER_TRAP_SPC(n, f)
139 /* Generate all the stubs */
140 #include "idt_table.h"
143 * Common dispatch point.
144 * Determine what mode has been interrupted and save state accordingly.
146 * rsp from user-space: interrupt state in PCB, or
147 * from kernel-space: interrupt state in kernel or interrupt stack
148 * GSBASE from user-space: pthread area, or
149 * from kernel-space: cpu_data
152 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
158 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
159 je L_dispatch_U32 /* 32-bit user task */
162 subq $(ISS64_OFFSET), %rsp
163 mov %r15, R64_R15(%rsp)
165 mov %gs:CPU_KERNEL_STACK, %rsp
169 subq $(ISS64_OFFSET), %rsp
170 mov %r15, R64_R15(%rsp)
174 * Here for 64-bit user task or kernel
177 movl $(SS_64), SS_FLAVOR(%r15)
180 * Save segment regs - for completeness since theyre not used.
182 movl %fs, R64_FS(%r15)
183 movl %gs, R64_GS(%r15)
185 /* Save general-purpose registers */
186 mov %rax, R64_RAX(%r15)
187 mov %rbx, R64_RBX(%r15)
188 mov %rcx, R64_RCX(%r15)
189 mov %rdx, R64_RDX(%r15)
190 mov %rbp, R64_RBP(%r15)
191 mov %rdi, R64_RDI(%r15)
192 mov %rsi, R64_RSI(%r15)
193 mov %r8, R64_R8(%r15)
194 mov %r9, R64_R9(%r15)
195 mov %r10, R64_R10(%r15)
196 mov %r11, R64_R11(%r15)
197 mov %r12, R64_R12(%r15)
198 mov %r13, R64_R13(%r15)
199 mov %r14, R64_R14(%r15)
201 /* cr2 is significant only for page-faults */
203 mov %rax, R64_CR2(%r15)
205 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
206 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
207 mov R64_CS(%r15), %esi /* %esi := cs for later */
209 jmp L_common_dispatch
211 L_64bit_entry_reject:
213 * Here for a 64-bit user attempting an invalid kernel entry.
216 leaq HNDL_ALLTRAPS(%rip), %rax
217 movq %rax, ISF64_TRAPFN+8(%rsp)
219 movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
224 * Check we're not a confused 64-bit user.
226 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
227 jne L_64bit_entry_reject
228 /* fall through to 32-bit handler: */
230 L_dispatch_U32: /* 32-bit user task */
231 subq $(ISS64_OFFSET), %rsp
233 mov %gs:CPU_KERNEL_STACK, %rsp
234 movl $(SS_32), SS_FLAVOR(%r15)
239 movl %ds, R32_DS(%r15)
240 movl %es, R32_ES(%r15)
241 movl %fs, R32_FS(%r15)
242 movl %gs, R32_GS(%r15)
245 * Save general 32-bit registers
247 mov %eax, R32_EAX(%r15)
248 mov %ebx, R32_EBX(%r15)
249 mov %ecx, R32_ECX(%r15)
250 mov %edx, R32_EDX(%r15)
251 mov %ebp, R32_EBP(%r15)
252 mov %esi, R32_ESI(%r15)
253 mov %edi, R32_EDI(%r15)
255 /* Unconditionally save cr2; only meaningful on page faults */
257 mov %eax, R32_CR2(%r15)
260 * Copy registers already saved in the machine state
261 * (in the interrupt stack frame) into the compat save area.
263 mov R64_RIP(%r15), %eax
264 mov %eax, R32_EIP(%r15)
265 mov R64_RFLAGS(%r15), %eax
266 mov %eax, R32_EFLAGS(%r15)
267 mov R64_RSP(%r15), %eax
268 mov %eax, R32_UESP(%r15)
269 mov R64_SS(%r15), %eax
270 mov %eax, R32_SS(%r15)
271 L_dispatch_U32_after_fault:
272 mov R64_CS(%r15), %esi /* %esi := %cs for later */
273 mov %esi, R32_CS(%r15)
274 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
275 mov %ebx, R32_TRAPNO(%r15)
276 mov R64_ERR(%r15), %eax
277 mov %eax, R32_ERR(%r15)
278 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
281 cld /* Ensure the direction flag is clear in the kernel */
282 cmpl $0, EXT(pmap_smap_enabled)(%rip)
284 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
287 * On entering the kernel, we typically don't switch CR3
288 * because the kernel shares the user's address space.
289 * But we mark the kernel's cr3 as "active" for TLB coherency evaluation
290 * If, however, the CPU's invalid TLB flag is set, we have to invalidate the TLB
291 * since the kernel pagetables were changed while we were in userspace.
293 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
294 * we switch to the kernel's address space on entry. Also,
295 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
296 * so that illicit accesses to userspace can be trapped.
298 mov %gs:CPU_KERNEL_CR3, %rcx
299 mov %rcx, %gs:CPU_ACTIVE_CR3
300 test $3, %esi /* user/kernel? */
301 jz 2f /* skip cr3 reload from kernel */
303 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
305 cmpl $0, EXT(no_shared_cr3)(%rip)
309 movw %gs:CPU_KERNEL_PCID, %ax
311 mov %rcx, %cr3 /* load kernel cr3 */
312 jmp 4f /* and skip tlb flush test */
314 mov %gs:CPU_ACTIVE_CR3+4, %rcx
318 testl $(1<<16), %ecx /* Global? */
320 movl $0, %gs:CPU_TLB_INVALID
321 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
322 and $(~CR4_PGE), %rcx
328 movb $0, %gs:CPU_TLB_INVALID_LOCAL
332 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
333 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
334 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
336 xor %ecx, %ecx /* If so, reset DR7 (the control) */
339 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
340 /* Dispatch the designated handler */
344 * Control is passed here to return to user.
346 Entry(return_to_user)
350 // XXX 'Be nice to tidy up this debug register restore sequence...
351 mov %gs:CPU_ACTIVE_THREAD, %rdx
352 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
354 test %rax, %rax /* Is there a debug register context? */
355 je 2f /* branch if not */
356 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
358 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
360 movl DS_DR1(%rax), %ecx
362 movl DS_DR2(%rax), %ecx
364 movl DS_DR3(%rax), %ecx
366 movl DS_DR7(%rax), %ecx
367 movq %rcx, %gs:CPU_DR7
370 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
372 mov DS64_DR1(%rax), %rcx
374 mov DS64_DR2(%rax), %rcx
376 mov DS64_DR3(%rax), %rcx
378 mov DS64_DR7(%rax), %rcx
379 mov %rcx, %gs:CPU_DR7
382 * On exiting the kernel there's typically no need to switch cr3 since we're
383 * already running in the user's address space which includes the
384 * kernel. We now mark the task's cr3 as active, for TLB coherency.
385 * If the target address space has a pagezero mapping present, or
386 * if no_shared_cr3 is set, we do need to switch cr3 at this point.
388 mov %gs:CPU_TASK_CR3, %rcx
389 mov %rcx, %gs:CPU_ACTIVE_CR3
390 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
391 jnz L_cr3_switch_island
392 movl EXT(no_shared_cr3)(%rip), %eax
393 test %eax, %eax /* -no_shared_cr3 */
394 jnz L_cr3_switch_island
397 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
400 mov %rax, %dr7 /* Set DR7 */
403 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
408 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
412 CCALL1(panic_idt64, %r15)
414 #endif /* DEBUG_IDT64 */
417 * Restore registers into the machine state for iret.
418 * Here on fault stack and PCB address in R11.
420 movl R32_EIP(%r15), %eax
421 movl %eax, R64_RIP(%r15)
422 movl R32_EFLAGS(%r15), %eax
423 movl %eax, R64_RFLAGS(%r15)
424 movl R32_CS(%r15), %eax
425 movl %eax, R64_CS(%r15)
426 movl R32_UESP(%r15), %eax
427 movl %eax, R64_RSP(%r15)
428 movl R32_SS(%r15), %eax
429 movl %eax, R64_SS(%r15)
432 * Restore general 32-bit registers
434 movl R32_EAX(%r15), %eax
435 movl R32_EBX(%r15), %ebx
436 movl R32_ECX(%r15), %ecx
437 movl R32_EDX(%r15), %edx
438 movl R32_EBP(%r15), %ebp
439 movl R32_ESI(%r15), %esi
440 movl R32_EDI(%r15), %edi
443 * Restore segment registers. A segment exception taken here will
444 * push state on the IST1 stack and will not affect the "PCB stack".
446 mov %r15, %rsp /* Set the PCB as the stack */
449 movl R32_DS(%rsp), %ds
451 movl R32_ES(%rsp), %es
453 movl R32_FS(%rsp), %fs
455 movl R32_GS(%rsp), %gs
457 /* pop compat frame + trapno, trapfn and error */
458 add $(ISS64_OFFSET)+8+8+8, %rsp
459 cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
460 /* test for fast entry/exit */
463 iretq /* return from interrupt */
467 pop %rdx /* user return eip */
468 pop %rcx /* pop and toss cs */
469 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
470 popf /* flags - carry denotes failure */
471 pop %rcx /* user return esp */
472 sti /* interrupts enabled after sysexit */
473 sysexitl /* 32-bit sysexit */
477 movw %gs:CPU_ACTIVE_PCID, %ax
480 jmp L_cr3_switch_return
484 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
488 CCALL1(panic_idt64, %r15)
491 cmpl $(KERNEL64_CS), R64_CS(%r15)
493 CCALL1(panic_idt64, %r15)
500 * Restore general 64-bit registers.
501 * Here on fault stack and PCB address in R15.
503 mov R64_R14(%r15), %r14
504 mov R64_R13(%r15), %r13
505 mov R64_R12(%r15), %r12
506 mov R64_R11(%r15), %r11
507 mov R64_R10(%r15), %r10
508 mov R64_R9(%r15), %r9
509 mov R64_R8(%r15), %r8
510 mov R64_RSI(%r15), %rsi
511 mov R64_RDI(%r15), %rdi
512 mov R64_RBP(%r15), %rbp
513 mov R64_RDX(%r15), %rdx
514 mov R64_RCX(%r15), %rcx
515 mov R64_RBX(%r15), %rbx
516 mov R64_RAX(%r15), %rax
519 * We must swap GS base if we're returning to user-space,
520 * or we're returning from an NMI that occurred in a trampoline
521 * before the user GS had been swapped. In the latter case, the NMI
522 * handler will have flagged the high-order 32-bits of the CS.
524 cmpq $(KERNEL64_CS), R64_CS(%r15)
528 mov R64_R15(%r15), %rsp
530 add $(ISS64_OFFSET)+24, %rsp /* pop saved state */
531 /* + trapno/trapfn/error */
532 cmpl $(SYSCALL_CS),ISF64_CS-24(%rsp)
533 /* test for fast entry/exit */
537 iretq /* return from interrupt */
541 * Here to load rcx/r11/rsp and perform the sysret back to user-space.
544 * rsp user stack pointer
546 mov ISF64_RIP-24(%rsp), %rcx
547 mov ISF64_RFLAGS-24(%rsp), %r11
548 mov ISF64_RSP-24(%rsp), %rsp
549 sysretq /* return from systen call */
554 * System call handlers.
555 * These are entered via a syscall interrupt. The system call number in %rax
556 * is saved to the error code slot in the stack frame. We then branch to the
557 * common state saving code.
561 #error NO UNIX INT!!!
563 Entry(idt64_unix_scall)
564 swapgs /* switch to kernel gs (cpu_data) */
565 pushq %rax /* save system call number */
566 PUSH_FUNCTION(HNDL_UNIX_SCALL)
568 jmp L_32bit_entry_check
571 Entry(idt64_mach_scall)
572 swapgs /* switch to kernel gs (cpu_data) */
573 pushq %rax /* save system call number */
574 PUSH_FUNCTION(HNDL_MACH_SCALL)
576 jmp L_32bit_entry_check
579 Entry(idt64_mdep_scall)
580 swapgs /* switch to kernel gs (cpu_data) */
581 pushq %rax /* save system call number */
582 PUSH_FUNCTION(HNDL_MDEP_SCALL)
584 jmp L_32bit_entry_check
586 /* Programmed into MSR_IA32_LSTAR by mp_desc.c */
590 swapgs /* Kapow! get per-cpu data area */
591 mov %rsp, %gs:CPU_UBER_TMP /* save user stack */
592 mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */
595 * Save values in the ISF frame in the PCB
596 * to cons up the saved machine state.
598 movl $(USER_DS), ISF64_SS(%rsp)
599 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
600 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
601 mov %rcx, ISF64_RIP(%rsp) /* rip */
602 mov %gs:CPU_UBER_TMP, %rcx
603 mov %rcx, ISF64_RSP(%rsp) /* user stack */
604 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
605 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
606 leaq HNDL_SYSCALL(%rip), %r11;
607 movq %r11, ISF64_TRAPFN(%rsp)
608 mov ISF64_RFLAGS(%rsp), %r11 /* Avoid leak, restore R11 */
609 jmp L_dispatch_U64 /* this can only be 64-bit */
612 * sysenter entry point
613 * Requires user code to set up:
614 * edx: user instruction pointer (return address)
615 * ecx: user stack pointer
616 * on which is pushed stub ret addr and saved ebx
617 * Return to user-space is made using sysexit.
618 * Note: sysenter/sysexit cannot be used for calls returning a value in edx,
619 * or requiring ecx to be preserved.
622 Entry(idt64_sysenter)
625 * Push values on to the PCB stack
626 * to cons up the saved machine state.
628 push $(USER_DS) /* ss */
632 * Clear, among others, the Nested Task (NT) flags bit;
633 * this is zeroed by INT, but not by SYSENTER.
637 push $(SYSENTER_CS) /* cs */
639 swapgs /* switch to kernel gs (cpu_data) */
641 push %rax /* err/eax - syscall code */
642 PUSH_FUNCTION(HNDL_SYSENTER)
644 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
645 jmp L_32bit_entry_check
648 Entry(idt64_page_fault)
649 PUSH_FUNCTION(HNDL_ALLTRAPS)
651 push %rax /* save %rax temporarily */
652 testb $3, 8+ISF64_CS(%rsp) /* was trap from kernel? */
653 jz L_kernel_trap /* - yes, handle with care */
654 pop %rax /* restore %rax, swapgs, and continue */
660 * Debug trap. Check for single-stepping across system call into
661 * kernel. If this is the case, taking the debug trap has turned
662 * off single-stepping - save the flags register with the trace
666 push $0 /* error code */
667 PUSH_FUNCTION(HNDL_ALLTRAPS)
670 testb $3, ISF64_CS(%rsp)
674 * trap came from kernel mode
677 push %rax /* save %rax temporarily */
678 lea EXT(idt64_sysenter)(%rip), %rax
679 cmp %rax, ISF64_RIP+8(%rsp)
683 * Interrupt stack frame has been pushed on the temporary stack.
684 * We have to switch to pcb stack and patch up the saved state.
686 mov %rcx, ISF64_ERR(%rsp) /* save %rcx in error slot */
687 mov ISF64_SS+8(%rsp), %rcx /* top of temp stack -> pcb stack */
688 xchg %rcx,%rsp /* switch to pcb stack */
689 push $(USER_DS) /* ss */
690 push ISF64_ERR(%rcx) /* saved %rcx into rsp slot */
691 push ISF64_RFLAGS(%rcx) /* rflags */
692 push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */
693 mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
694 jmp L_sysenter_continue /* continue sysenter entry */
697 Entry(idt64_double_fault)
698 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
699 pushq $(T_DOUBLE_FAULT)
700 jmp L_dispatch_kernel
704 * For GP/NP/SS faults, we use the IST1 stack.
705 * For faults from user-space, we have to copy the machine state to the
706 * PCB stack and then dispatch as normal.
707 * For faults in kernel-space, we need to scrub for kernel exit faults and
708 * treat these as user-space faults. But for all other kernel-space faults
709 * we continue to run on the IST1 stack and we dispatch to handle the fault
712 Entry(idt64_gen_prot)
713 PUSH_FUNCTION(HNDL_ALLTRAPS)
714 pushq $(T_GENERAL_PROTECTION)
715 jmp trap_check_kernel_exit /* check for kernel exit sequence */
717 Entry(idt64_stack_fault)
718 PUSH_FUNCTION(HNDL_ALLTRAPS)
719 pushq $(T_STACK_FAULT)
720 jmp trap_check_kernel_exit /* check for kernel exit sequence */
723 PUSH_FUNCTION(HNDL_ALLTRAPS)
724 pushq $(T_SEGMENT_NOT_PRESENT)
725 /* indicate fault type */
726 trap_check_kernel_exit:
727 testb $3,ISF64_CS(%rsp)
730 /* Here for fault from user-space. Copy interrupt state to PCB. */
733 mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */
734 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
735 mov ISF64_SS+8(%rsp), %rax
736 mov %rax, ISF64_SS(%rcx)
737 mov ISF64_RSP+8(%rsp), %rax
738 mov %rax, ISF64_RSP(%rcx)
739 mov ISF64_RFLAGS+8(%rsp), %rax
740 mov %rax, ISF64_RFLAGS(%rcx)
741 mov ISF64_CS+8(%rsp), %rax
742 mov %rax, ISF64_CS(%rcx)
743 mov ISF64_RIP+8(%rsp), %rax
744 mov %rax, ISF64_RIP(%rcx)
745 mov ISF64_ERR+8(%rsp), %rax
746 mov %rax, ISF64_ERR(%rcx)
747 mov ISF64_TRAPFN+8(%rsp), %rax
748 mov %rax, ISF64_TRAPFN(%rcx)
749 mov ISF64_TRAPNO+8(%rsp), %rax
750 mov %rax, ISF64_TRAPNO(%rcx)
752 mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */
753 xchg %rcx, %rsp /* to PCB stack with user RCX */
757 /* Here for GPF from kernel_space. Check for recoverable cases. */
759 leaq EXT(ret32_iret)(%rip), %rax
760 cmp %rax, 8+ISF64_RIP(%rsp)
762 leaq EXT(ret64_iret)(%rip), %rax
763 cmp %rax, 8+ISF64_RIP(%rsp)
765 leaq EXT(ret32_set_ds)(%rip), %rax
766 cmp %rax, 8+ISF64_RIP(%rsp)
767 je L_32bit_fault_set_seg
768 leaq EXT(ret32_set_es)(%rip), %rax
769 cmp %rax, 8+ISF64_RIP(%rsp)
770 je L_32bit_fault_set_seg
771 leaq EXT(ret32_set_fs)(%rip), %rax
772 cmp %rax, 8+ISF64_RIP(%rsp)
773 je L_32bit_fault_set_seg
774 leaq EXT(ret32_set_gs)(%rip), %rax
775 cmp %rax, 8+ISF64_RIP(%rsp)
776 je L_32bit_fault_set_seg
782 * Here after taking an unexpected trap from kernel mode - perhaps
783 * while running in the trampolines hereabouts.
784 * Note: %rax has been pushed on stack.
785 * Make sure we're not on the PCB stack, if so move to the kernel stack.
786 * This is likely a fatal condition.
787 * But first, ensure we have the kernel gs base active...
791 mov $(MSR_IA32_GS_BASE), %ecx
792 rdmsr /* read kernel gsbase */
793 test $0x80000000, %edx /* test MSB of address */
800 movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
802 cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
803 jb 2f /* - yes, deal with it */
804 pop %rax /* - no, restore %rax */
805 jmp L_dispatch_kernel
808 * Here if %rsp is in the PCB
809 * Copy the interrupt stack frame from PCB stack to kernel stack
811 movq %gs:CPU_KERNEL_STACK, %rax
813 pushq 8+ISF64_SS(%rax)
814 pushq 8+ISF64_RSP(%rax)
815 pushq 8+ISF64_RFLAGS(%rax)
816 pushq 8+ISF64_CS(%rax)
817 pushq 8+ISF64_RIP(%rax)
818 pushq 8+ISF64_ERR(%rax)
819 pushq 8+ISF64_TRAPFN(%rax)
820 pushq 8+ISF64_TRAPNO(%rax)
822 jmp L_dispatch_kernel
826 * GP/NP fault on IRET: CS or SS is in error.
827 * User GSBASE is active.
828 * On IST1 stack containing:
829 * (rax saved above, which is immediately popped)
830 * 0 ISF64_TRAPNO: trap code (NP or GP)
831 * 8 ISF64_TRAPFN: trap function
832 * 16 ISF64_ERR: segment number in error (error code)
833 * 24 ISF64_RIP: kernel RIP
834 * 32 ISF64_CS: kernel CS
835 * 40 ISF64_RFLAGS: kernel RFLAGS
836 * 48 ISF64_RSP: kernel RSP
837 * 56 ISF64_SS: kernel SS
838 * On the PCB stack, pointed to by the kernel's RSP is:
845 * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
846 * as a user fault with:
847 * 0 ISF64_TRAPNO: trap code (NP or GP)
848 * 8 ISF64_TRAPFN: trap function
849 * 16 ISF64_ERR: segment number in error (error code)
857 pop %rax /* recover saved %rax */
858 mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
859 mov ISF64_RSP(%rsp), %rax
860 xchg %rax, %rsp /* switch to PCB stack */
862 push ISF64_TRAPFN(%rax)
863 push ISF64_TRAPNO(%rax)
864 mov ISF64_RIP(%rax), %rax /* restore rax */
865 /* now treat as fault from user */
869 * Fault restoring a segment register. All of the saved state is still
870 * on the stack untouched since we haven't yet moved the stack pointer.
871 * On IST1 stack containing:
872 * (rax saved above, which is immediately popped)
873 * 0 ISF64_TRAPNO: trap code (NP or GP)
874 * 8 ISF64_TRAPFN: trap function
875 * 16 ISF64_ERR: segment number in error (error code)
876 * 24 ISF64_RIP: kernel RIP
877 * 32 ISF64_CS: kernel CS
878 * 40 ISF64_RFLAGS: kernel RFLAGS
879 * 48 ISF64_RSP: kernel RSP
880 * 56 ISF64_SS: kernel SS
881 * On the PCB stack, pointed to by the kernel's RSP is:
883 * 8 user trap function
891 L_32bit_fault_set_seg:
893 pop %rax /* toss saved %rax from stack */
894 mov ISF64_TRAPNO(%rsp), %rax
895 mov ISF64_TRAPFN(%rsp), %rcx
896 mov ISF64_ERR(%rsp), %rdx
897 mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
898 mov %rax,R64_TRAPNO(%rsp)
899 mov %rcx,R64_TRAPFN(%rsp)
900 mov %rdx,R64_ERR(%rsp)
901 /* now treat as fault from user */
902 /* except that all the state is */
903 /* already saved - we just have to */
904 /* move the trapno and error into */
905 /* the compatibility frame */
906 jmp L_dispatch_U32_after_fault
909 * Fatal exception handlers:
911 Entry(idt64_db_task_dbl_fault)
912 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
913 pushq $(T_DOUBLE_FAULT)
916 Entry(idt64_db_task_stk_fault)
917 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
918 pushq $(T_STACK_FAULT)
922 push $(0) /* Error */
923 PUSH_FUNCTION(HNDL_MACHINE_CHECK)
924 pushq $(T_MACHINE_CHECK)
929 * This may or may not be fatal but extreme care is required
930 * because it may fall when control was already in another trampoline.
932 * We get here on IST2 stack which is used for NMIs only.
933 * We must be aware of the interrupted state:
934 * - from user-space, we
935 * - copy state to the PCB and continue;
936 * - from kernel-space, we
937 * - copy state to the kernel stack and continue, but
938 * - check what GSBASE was active, set the kernel base and
939 * - ensure that the active state is restored when the NMI is dismissed.
942 push %rax /* save RAX to ISF64_ERR */
943 push %rcx /* save RCX to ISF64_TRAPFN */
944 push %rdx /* save RDX to ISF64_TRAPNO */
945 testb $3, ISF64_CS(%rsp) /* NMI from user-space? */
948 /* From user-space: copy interrupt state to user PCB */
950 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
951 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
952 swapgs /* swap back for L_dispatch */
953 jmp 4f /* Copy state to PCB */
958 * Determine whether the kernel or user GS is set.
959 * Set the kernel and ensure that we'll swap back correctly at IRET.
961 mov $(MSR_IA32_GS_BASE), %ecx
962 rdmsr /* read kernel gsbase */
963 test $0x80000000, %edx /* test MSB of address */
966 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
969 * Determine whether we're on the kernel or interrupt stack
972 mov ISF64_RSP(%rsp), %rcx
973 mov %gs:CPU_KERNEL_STACK, %rax
975 and EXT(kernel_stack_mask)(%rip), %rax
976 test %rax, %rax /* are we on the kernel stack? */
979 mov %gs:CPU_INT_STACK_TOP, %rax
980 dec %rax /* intr stack top is byte above max */
982 and EXT(kernel_stack_mask)(%rip), %rax
983 test %rax, %rax /* are we on the interrupt stack? */
986 mov %gs:CPU_KERNEL_STACK, %rcx
988 /* 16-byte-align kernel/interrupt stack for state push */
989 and $0xFFFFFFFFFFFFFFF0, %rcx
993 * Copy state from NMI stack (RSP) to the save area (RCX) which is
994 * the PCB for user or kernel/interrupt stack from kernel.
995 * ISF64_ERR(RSP) saved RAX
996 * ISF64_TRAPFN(RSP) saved RCX
997 * ISF64_TRAPNO(RSP) saved RDX
999 xchg %rsp, %rcx /* set for pushes */
1001 push ISF64_RSP(%rcx)
1002 push ISF64_RFLAGS(%rcx)
1004 push ISF64_RIP(%rcx)
1005 push $(0) /* error code 0 */
1006 lea HNDL_ALLINTRS(%rip), %rax
1007 push %rax /* trapfn allintrs */
1008 push $(T_NMI) /* trapno T_NMI */
1009 mov ISF64_ERR(%rcx), %rax
1010 mov ISF64_TRAPNO(%rcx), %rdx
1011 mov ISF64_TRAPFN(%rcx), %rcx
1015 /* All 'exceptions' enter hndl_alltraps, with:
1016 * r15 x86_saved_state_t address
1017 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1020 * The rest of the state is set up as:
1021 * both rsp and r15 are 16-byte aligned
1022 * interrupts disabled
1023 * direction flag cleared
1025 Entry(hndl_alltraps)
1032 /* Check for active vtimers in the current task */
1033 mov %gs:CPU_ACTIVE_THREAD, %rcx
1034 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
1035 mov TH_TASK(%rcx), %rbx
1036 TASK_VTIMER_CHECK(%rbx, %rcx)
1038 CCALL1(user_trap, %r15) /* call user trap routine */
1039 /* user_trap() unmasks interrupts */
1040 cli /* hold off intrs - critical section */
1041 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1044 Entry(return_from_trap)
1045 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
1046 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
1047 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1049 xorq %rbp, %rbp /* clear framepointer */
1050 mov %r15, %rdi /* Set RDI to current thread */
1051 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
1053 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
1054 movl %gs:CPU_PENDING_AST,%eax
1056 je EXT(return_to_user) /* branch if no AST */
1058 L_return_from_trap_with_ast:
1059 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1060 je 2f /* no, go handle the AST */
1061 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
1063 /* no... 32-bit user mode */
1064 movl R32_EIP(%r15), %edi
1065 xorq %rbp, %rbp /* clear framepointer */
1066 CCALL(commpage_is_in_pfz32)
1068 je 2f /* not in the PFZ... go service AST */
1069 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
1070 jmp EXT(return_to_user)
1072 movq R64_RIP(%r15), %rdi
1073 xorq %rbp, %rbp /* clear framepointer */
1074 CCALL(commpage_is_in_pfz64)
1076 je 2f /* not in the PFZ... go service AST */
1077 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
1078 jmp EXT(return_to_user)
1080 sti /* interrupts always enabled on return to user mode */
1082 xor %edi, %edi /* zero %rdi */
1083 xorq %rbp, %rbp /* clear framepointer */
1084 CCALL(i386_astintr) /* take the AST */
1087 mov %rsp, %r15 /* AST changes stack, saved state */
1088 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1089 jmp EXT(return_from_trap) /* and check again (rare) */
1092 * Trap from kernel mode. No need to switch stacks.
1093 * Interrupts must be off here - we will set them to state at time of trap
1094 * as soon as it's safe for us to do so and not recurse doing preemption
1098 movq %r15, %rdi /* saved state addr */
1099 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
1100 pushq %rbp /* Extend framepointer chain */
1102 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
1105 mov %rsp, %r15 /* DTrace slides stack/saved-state */
1108 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1109 testl $(AST_URGENT),%eax /* any urgent preemption? */
1110 je ret_to_kernel /* no, nothing to do */
1111 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
1112 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
1113 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
1115 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1117 movq %gs:CPU_KERNEL_STACK,%rax
1120 andq EXT(kernel_stack_mask)(%rip),%rcx
1121 testq %rcx,%rcx /* are we on the kernel stack? */
1122 jne ret_to_kernel /* no, skip it */
1124 CCALL1(i386_astintr, $1) /* take the AST */
1126 mov %rsp, %r15 /* AST changes stack, saved state */
1131 * All interrupts on all tasks enter here with:
1132 * r15 x86_saved_state_t
1133 * rsp kernel or interrupt stack
1136 * both rsp and r15 are 16-byte aligned
1137 * interrupts disabled
1138 * direction flag cleared
1140 Entry(hndl_allintrs)
1142 * test whether already on interrupt stack
1144 movq %gs:CPU_INT_STACK_TOP,%rcx
1147 leaq -INTSTACK_SIZE(%rcx),%rdx
1149 jb int_from_intstack
1151 xchgq %rcx,%rsp /* switch to interrupt stack */
1153 mov %cr0,%rax /* get cr0 */
1154 orl $(CR0_TS),%eax /* or in TS bit */
1155 mov %rax,%cr0 /* set cr0 */
1157 pushq %rcx /* save pointer to old stack */
1158 pushq %gs:CPU_INT_STATE /* save previous intr state */
1159 movq %r15,%gs:CPU_INT_STATE /* set intr state */
1161 TIME_INT_ENTRY /* do timing */
1163 /* Check for active vtimers in the current task */
1164 mov %gs:CPU_ACTIVE_THREAD, %rcx
1165 mov TH_TASK(%rcx), %rbx
1166 TASK_VTIMER_CHECK(%rbx, %rcx)
1168 incl %gs:CPU_PREEMPTION_LEVEL
1169 incl %gs:CPU_INTERRUPT_LEVEL
1171 CCALL1(interrupt, %r15) /* call generic interrupt routine */
1173 .globl EXT(return_to_iret)
1174 LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1176 decl %gs:CPU_INTERRUPT_LEVEL
1177 decl %gs:CPU_PREEMPTION_LEVEL
1179 TIME_INT_EXIT /* do timing */
1181 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1182 popq %rsp /* switch back to old stack */
1184 movq %gs:CPU_ACTIVE_THREAD,%rax
1185 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
1186 cmpq $0,%rax /* Is there a context */
1187 je 1f /* Branch if not */
1188 movl FP_VALID(%rax),%eax /* Load fp_valid */
1189 cmpl $0,%eax /* Check if valid */
1190 jne 1f /* Branch if valid */
1194 mov %cr0,%rax /* get cr0 */
1195 orl $(CR0_TS),%eax /* or in TS bit */
1196 mov %rax,%cr0 /* set cr0 */
1198 /* Load interrupted code segment into %eax */
1199 movl R32_CS(%r15),%eax /* assume 32-bit state */
1200 cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
1203 movl R64_CS(%r15),%eax /* 64-bit user mode */
1206 cmpl $(SS_32),SS_FLAVOR(%r15)
1209 CCALL1(panic_idt64, %r15)
1213 movl R64_CS(%r15),%eax /* 64-bit user mode */
1216 testb $3,%al /* user mode, */
1217 jnz ast_from_interrupt_user /* go handle potential ASTs */
1219 * we only want to handle preemption requests if
1220 * the interrupt fell in the kernel context
1221 * and preemption isn't disabled
1223 movl %gs:CPU_PENDING_AST,%eax
1224 testl $(AST_URGENT),%eax /* any urgent requests? */
1225 je ret_to_kernel /* no, nothing to do */
1227 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1228 jne ret_to_kernel /* yes, skip it */
1231 * Take an AST from kernel space. We don't need (and don't want)
1232 * to do as much as the case where the interrupt came from user
1235 CCALL1(i386_astintr, $1)
1237 mov %rsp, %r15 /* AST changes stack, saved state */
1242 * nested int - simple path, can't preempt etc on way out
1245 incl %gs:CPU_PREEMPTION_LEVEL
1246 incl %gs:CPU_INTERRUPT_LEVEL
1247 incl %gs:CPU_NESTED_ISTACK
1249 push %gs:CPU_INT_STATE
1250 mov %r15, %gs:CPU_INT_STATE
1252 CCALL1(interrupt, %r15)
1254 pop %gs:CPU_INT_STATE
1256 decl %gs:CPU_INTERRUPT_LEVEL
1257 decl %gs:CPU_PREEMPTION_LEVEL
1258 decl %gs:CPU_NESTED_ISTACK
1263 * Take an AST from an interrupted user
1265 ast_from_interrupt_user:
1266 movl %gs:CPU_PENDING_AST,%eax
1267 testl %eax,%eax /* pending ASTs? */
1268 je EXT(ret_to_user) /* no, nothing to do */
1272 movl $1, %ecx /* check if we're in the PFZ */
1273 jmp L_return_from_trap_with_ast /* return */
1276 /* Syscall dispatch routines! */
1281 * System call entries via INTR_GATE or sysenter:
1283 * r15 x86_saved_state32_t
1286 * both rsp and r15 are 16-byte aligned
1287 * interrupts disabled
1288 * direction flag cleared
1291 Entry(hndl_sysenter)
1293 * We can be here either for a mach syscall or a unix syscall,
1294 * as indicated by the sign of the code:
1296 movl R32_EAX(%r15),%eax
1298 js EXT(hndl_mach_scall) /* < 0 => mach */
1301 Entry(hndl_unix_scall)
1305 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1306 movq TH_TASK(%rcx),%rbx /* point to current task */
1307 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1309 /* Check for active vtimers in the current task */
1310 TASK_VTIMER_CHECK(%rbx,%rcx)
1314 CCALL1(unix_syscall, %r15)
1316 * always returns through thread_exception_return
1320 Entry(hndl_mach_scall)
1323 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1324 movq TH_TASK(%rcx),%rbx /* point to current task */
1325 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1327 /* Check for active vtimers in the current task */
1328 TASK_VTIMER_CHECK(%rbx,%rcx)
1332 CCALL1(mach_call_munger, %r15)
1334 * always returns through thread_exception_return
1338 Entry(hndl_mdep_scall)
1341 /* Check for active vtimers in the current task */
1342 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1343 movq TH_TASK(%rcx),%rbx /* point to current task */
1344 TASK_VTIMER_CHECK(%rbx,%rcx)
1348 CCALL1(machdep_syscall, %r15)
1350 * always returns through thread_exception_return
1355 * System call entries via syscall only:
1357 * r15 x86_saved_state64_t
1360 * both rsp and r15 are 16-byte aligned
1361 * interrupts disabled
1362 * direction flag cleared
1368 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1369 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
1370 movq TH_TASK(%rcx),%rbx /* point to current task */
1372 /* Check for active vtimers in the current task */
1373 TASK_VTIMER_CHECK(%rbx,%rcx)
1376 * We can be here either for a mach, unix machdep or diag syscall,
1377 * as indicated by the syscall class:
1379 movl R64_RAX(%r15), %eax /* syscall number/class */
1381 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1382 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1383 je EXT(hndl_mach_scall64)
1384 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1385 je EXT(hndl_unix_scall64)
1386 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1387 je EXT(hndl_mdep_scall64)
1388 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1389 je EXT(hndl_diag_scall64)
1391 /* Syscall class unknown */
1393 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1397 Entry(hndl_unix_scall64)
1398 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1401 CCALL1(unix_syscall64, %r15)
1403 * always returns through thread_exception_return
1407 Entry(hndl_mach_scall64)
1408 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1411 CCALL1(mach_call_munger64, %r15)
1413 * always returns through thread_exception_return
1418 Entry(hndl_mdep_scall64)
1421 CCALL1(machdep_syscall64, %r15)
1423 * always returns through thread_exception_return
1426 Entry(hndl_diag_scall64)
1427 CCALL1(diagCall64, %r15) // Call diagnostics
1428 test %eax, %eax // What kind of return is this?
1429 je 1f // - branch if bad (zero)
1430 jmp EXT(return_to_user) // Normal return, do not check asts...
1433 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1436 Entry(hndl_machine_check)
1437 CCALL1(panic_machine_check64, %r15)
1440 Entry(hndl_double_fault)
1441 CCALL1(panic_double_fault64, %r15)