*/
#include <i386/asm.h>
#include <assym.s>
-#include <mach_kdb.h>
+#include <debug.h>
#include <i386/eflags.h>
#include <i386/rtclock_asm.h>
#include <i386/trap.h>
* syscall - synchronous system call request
* fatal - fatal traps
*/
-
/*
- * Handlers:
+ * Indices of handlers for each exception type.
*/
-#define HNDL_ALLINTRS EXT(hndl_allintrs)
-#define HNDL_ALLTRAPS EXT(hndl_alltraps)
-#define HNDL_SYSENTER EXT(hndl_sysenter)
-#define HNDL_SYSCALL EXT(hndl_syscall)
-#define HNDL_UNIX_SCALL EXT(hndl_unix_scall)
-#define HNDL_MACH_SCALL EXT(hndl_mach_scall)
-#define HNDL_MDEP_SCALL EXT(hndl_mdep_scall)
-#define HNDL_DIAG_SCALL EXT(hndl_diag_scall)
-#define HNDL_DOUBLE_FAULT EXT(hndl_double_fault)
-#define HNDL_MACHINE_CHECK EXT(hndl_machine_check)
-
-
-#if 1
-#define PUSH_FUNCTION(func) \
- sub $8, %rsp ;\
- push %rax ;\
- leaq func(%rip), %rax ;\
- movq %rax, 8(%rsp) ;\
- pop %rax
-#else
-#define PUSH_FUNCTION(func) pushq func
-#endif
+#define HNDL_ALLINTRS 0
+#define HNDL_ALLTRAPS 1
+#define HNDL_SYSENTER 2
+#define HNDL_SYSCALL 3
+#define HNDL_UNIX_SCALL 4
+#define HNDL_MACH_SCALL 5
+#define HNDL_MDEP_SCALL 6
+#define HNDL_DOUBLE_FAULT 7
+#define HNDL_MACHINE_CHECK 8
+
+/* Begin double-mapped descriptor section */
+
+.section __HIB, __desc
+.globl EXT(idt64_hndl_table0)
+EXT(idt64_hndl_table0):
+ .quad EXT(ks_dispatch)
+ .quad EXT(ks_64bit_return)
+ .quad 0 /* Populated with CPU shadow displacement*/
+ .quad EXT(ks_return)
+
+EXT(idt64_hndl_table1):
+ .quad EXT(hndl_allintrs)
+ .quad EXT(hndl_alltraps)
+ .quad EXT(hndl_sysenter)
+ .quad EXT(hndl_syscall)
+ .quad EXT(hndl_unix_scall)
+ .quad EXT(hndl_mach_scall)
+ .quad EXT(hndl_mdep_scall)
+ .quad EXT(hndl_double_fault)
+ .quad EXT(hndl_machine_check)
+.text
+
/* The wrapper for all non-special traps/interrupts */
/* Everything up to PUSH_FUNCTION is just to output
push %rax ;\
POSTCODE2(0x6400+n) ;\
pop %rax ;\
- PUSH_FUNCTION(f) ;\
+ pushq $(f) ;\
pushq $(n) ;\
jmp L_dispatch
#else
#define IDT_ENTRY_WRAPPER(n, f) \
- PUSH_FUNCTION(f) ;\
+ pushq $(f) ;\
pushq $(n) ;\
jmp L_dispatch
#endif
/* A trap with a special-case handler, hence we don't need to define anything */
#define TRAP_SPC(n, f)
-#define TRAP_IST(n, f)
+#define TRAP_IST1(n, f)
+#define TRAP_IST2(n, f)
#define USER_TRAP_SPC(n, f)
+/* Begin double-mapped text section */
+.section __HIB, __text
/* Generate all the stubs */
#include "idt_table.h"
+Entry(idt64_page_fault)
+ pushq $(HNDL_ALLTRAPS)
+ push $(T_PAGE_FAULT)
+ jmp L_dispatch
+
+Entry(idt64_debug)
+ push $0 /* error code */
+ pushq $(HNDL_ALLTRAPS)
+ pushq $(T_DEBUG)
+ jmp L_dispatch
+/*
+ * Legacy interrupt gate System call handlers.
+ * These are entered via a syscall interrupt. The system call number in %rax
+ * is saved to the error code slot in the stack frame. We then branch to the
+ * common state saving code.
+ */
+
+#ifndef UNIX_INT
+#error NO UNIX INT!!!
+#endif
+Entry(idt64_unix_scall)
+ pushq %rax /* save system call number */
+ pushq $(HNDL_UNIX_SCALL)
+ pushq $(UNIX_INT)
+ jmp L_dispatch
+
+Entry(idt64_mach_scall)
+ pushq %rax /* save system call number */
+ pushq $(HNDL_MACH_SCALL)
+ pushq $(MACH_INT)
+ jmp L_dispatch
+
+Entry(idt64_mdep_scall)
+ pushq %rax /* save system call number */
+ pushq $(HNDL_MDEP_SCALL)
+ pushq $(MACHDEP_INT)
+ jmp L_dispatch
+
+/*
+ * For GP/NP/SS faults, we use the IST1 stack.
+ * For faults from user-space, we have to copy the machine state to the
+ * PCB stack and then dispatch as normal.
+ * For faults in kernel-space, we need to scrub for kernel exit faults and
+ * treat these as user-space faults. But for all other kernel-space faults
+ * we continue to run on the IST1 stack and we dispatch to handle the fault
+ * as fatal.
+ */
+Entry(idt64_gen_prot)
+ pushq $(HNDL_ALLTRAPS)
+ pushq $(T_GENERAL_PROTECTION)
+ jmp L_dispatch
+
+Entry(idt64_stack_fault)
+ pushq $(HNDL_ALLTRAPS)
+ pushq $(T_STACK_FAULT)
+ jmp L_dispatch
+
+Entry(idt64_segnp)
+ pushq $(HNDL_ALLTRAPS)
+ pushq $(T_SEGMENT_NOT_PRESENT)
+ jmp L_dispatch
+
+/*
+ * Fatal exception handlers:
+ */
+Entry(idt64_db_task_dbl_fault)
+ pushq $(HNDL_DOUBLE_FAULT)
+ pushq $(T_DOUBLE_FAULT)
+ jmp L_dispatch
+
+Entry(idt64_db_task_stk_fault)
+ pushq $(HNDL_DOUBLE_FAULT)
+ pushq $(T_STACK_FAULT)
+ jmp L_dispatch
+
+Entry(idt64_mc)
+ push $(0) /* Error */
+ pushq $(HNDL_MACHINE_CHECK)
+ pushq $(T_MACHINE_CHECK)
+ jmp L_dispatch
+
+/*
+ * NMI
+ * This may or may not be fatal but extreme care is required
+ * because it may fall when control was already in another trampoline.
+ *
+ * We get here on IST2 stack which is used for NMIs only.
+ */
+Entry(idt64_nmi)
+ push %rax /* save RAX to ISF64_ERR */
+ push %rcx /* save RCX to ISF64_TRAPFN */
+ push %rdx /* save RDX to ISF64_TRAPNO */
+ jmp L_dispatch
+
+Entry(idt64_double_fault)
+ pushq $(HNDL_DOUBLE_FAULT)
+ pushq $(T_DOUBLE_FAULT)
+ jmp L_dispatch
+
+Entry(hi64_syscall)
+Entry(idt64_syscall)
+ swapgs
+ /* Use RAX as a temporary by shifting its contents into R11[32:63]
+ * The systemcall number is defined to be a 32-bit quantity, as is
+ * RFLAGS.
+ */
+ shlq $32, %rax
+ or %rax, %r11
+.globl EXT(dblsyscall_patch_point)
+EXT(dblsyscall_patch_point):
+// movabsq $0x12345678ABCDEFFFULL, %rax
+ /* Generate offset to the double-mapped per-CPU data shadow
+ * into RAX
+ */
+ leaq EXT(idt64_hndl_table0)(%rip), %rax
+ mov 16(%rax), %rax
+ mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
+ mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
+ sub $(ISF64_SIZE), %rsp
+
+ /*
+ * Synthesize an ISF frame on the exception stack
+ */
+ movl $(USER_DS), ISF64_SS(%rsp)
+ mov %rcx, ISF64_RIP(%rsp) /* rip */
+
+ mov %gs:CPU_UBER_TMP(%rax), %rcx
+ mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
+
+ mov %r11, %rax
+ shrq $32, %rax /* Restore RAX */
+ mov %r11d, %r11d /* Clear r11[32:63] */
+
+ mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
+ movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
+ mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
+ movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
+ movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
+ swapgs
+ jmp L_dispatch /* this can only be 64-bit */
+
+Entry(hi64_sysenter)
+Entry(idt64_sysenter)
+ /* Synthesize an interrupt stack frame onto the
+ * exception stack.
+ */
+ push $(USER_DS) /* ss */
+ push %rcx /* uesp */
+ pushf /* flags */
+ /*
+ * Clear, among others, the Nested Task (NT) flags bit;
+ * this is zeroed by INT, but not by SYSENTER.
+ */
+ push $0
+ popf
+ push $(SYSENTER_CS) /* cs */
+L_sysenter_continue:
+ push %rdx /* eip */
+ push %rax /* err/eax - syscall code */
+ pushq $(HNDL_SYSENTER)
+ pushq $(T_SYSENTER)
+ orl $(EFL_IF), ISF64_RFLAGS(%rsp)
+ jmp L_dispatch
+
/*
* Common dispatch point.
* Determine what mode has been interrupted and save state accordingly.
+ * Here with:
+ * rsp from user-space: interrupt state in PCB, or
+ * from kernel-space: interrupt state in kernel or interrupt stack
+ * GSBASE from user-space: pthread area, or
+ * from kernel-space: cpu_data
*/
-L_dispatch:
- cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
- je L_64bit_dispatch
+L_dispatch:
+ pushq %rax
+ testb $3, 8+ISF64_CS(%rsp)
+ jz 1f
+ swapgs
+ leaq EXT(idt64_hndl_table0)(%rip), %rax
+ mov 16(%rax), %rax
+
+ mov %gs:CPU_TASK_CR3(%rax), %rax
+ mov %rax, %cr3
+#if DEBUG
+ mov %rax, %gs:CPU_ENTRY_CR3
+#endif
+1:
+ /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
+ leaq EXT(idt64_hndl_table0)(%rip), %rax
+ /* Indirect branch to non-doublemapped trampolines */
+ jmp *(%rax)
+/* User return: register restoration and address space switch sequence */
+Entry(ks_64bit_return)
+ mov R64_R14(%r15), %r14
+ mov R64_R13(%r15), %r13
+ mov R64_R12(%r15), %r12
+ mov R64_R11(%r15), %r11
+ mov R64_R10(%r15), %r10
+ mov R64_R9(%r15), %r9
+ mov R64_R8(%r15), %r8
+ mov R64_RSI(%r15), %rsi
+ mov R64_RDI(%r15), %rdi
+ mov R64_RBP(%r15), %rbp
+ mov R64_RDX(%r15), %rdx
+ mov R64_RCX(%r15), %rcx
+ mov R64_RBX(%r15), %rbx
+ mov R64_RAX(%r15), %rax
+ /* Switch to per-CPU exception stack */
+ mov %gs:CPU_ESTACK, %rsp
+
+ /* Synthesize interrupt stack frame from PCB savearea to exception stack */
+ push R64_SS(%r15)
+ push R64_RSP(%r15)
+ push R64_RFLAGS(%r15)
+ push R64_CS(%r15)
+ push R64_RIP(%r15)
+
+ mov R64_R15(%r15), %r15
+ cmpq $(KERNEL64_CS), 8(%rsp)
+ jz 1f
+ /* Discover user cr3/ASID */
+ push %rax
+ mov %gs:CPU_UCR3, %rax
+#if DEBUG
+ mov %rax, %gs:CPU_EXIT_CR3
+#endif
+ mov %rax, %cr3
+ /* Continue execution on the shared/doublemapped trampoline */
+ pop %rax
swapgs
-
+1:
+ cmpl $(SYSCALL_CS), 8(%rsp) /* test for exit via SYSRET */
+ je L_sysret
+EXT(ret64_iret):
+ iretq /* return from interrupt */
+L_sysret:
/*
- * Check for trap from EFI32, and restore cr3 and rsp if so.
- * A trap from EFI32 is fatal.
+ * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
+ * rcx user rip
+ * r11 user rflags
+ * rsp user stack pointer
*/
- cmpl $(KERNEL32_CS), ISF64_CS(%rsp)
- jne L_dispatch_continue
- push %rcx
- mov EXT(pal_efi_saved_cr3)(%rip), %rcx
- mov %rcx, %cr3
- leaq 0(%rip), %rcx
- shr $32, %rcx /* splice the upper 32-bits of rip */
- shl $32, %rsp /* .. and the lower 32-bits of rsp */
- shrd $32, %rcx, %rsp /* to recover the full 64-bits of rsp */
pop %rcx
+ add $8, %rsp
+ pop %r11
+ pop %rsp
+ sysretq /* return from system call */
+/* End of double-mapped TEXT */
+.text
+
+Entry(ks_dispatch)
+ popq %rax
+ cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
+ je EXT(ks_dispatch_kernel)
+
+ mov %rax, %gs:CPU_UBER_TMP
+ mov %gs:CPU_UBER_ISF, %rax
+ add $(ISF64_SIZE), %rax
-L_dispatch_continue:
+ xchg %rsp, %rax
+/* Memory to memory moves (aint x86 wonderful):
+ * Transfer the exception frame from the per-CPU exception stack to the
+ * 'PCB' stack programmed at cswitch.
+ */
+ push ISF64_SS(%rax)
+ push ISF64_RSP(%rax)
+ push ISF64_RFLAGS(%rax)
+ push ISF64_CS(%rax)
+ push ISF64_RIP(%rax)
+ push ISF64_ERR(%rax)
+ push ISF64_TRAPFN(%rax)
+ push ISF64_TRAPNO(%rax)
+ mov %gs:CPU_UBER_TMP, %rax
+ jmp EXT(ks_dispatch_user)
+
+Entry (ks_return)
+ jmp .
+
+Entry(ks_dispatch_user)
cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
- je L_32bit_dispatch /* 32-bit user task */
- /* fall through to 64bit user dispatch */
+ je L_dispatch_U32 /* 32-bit user task */
+
+L_dispatch_U64:
+ subq $(ISS64_OFFSET), %rsp
+ mov %r15, R64_R15(%rsp)
+ mov %rsp, %r15
+ mov %gs:CPU_KERNEL_STACK, %rsp
+ jmp L_dispatch_64bit
+
+Entry(ks_dispatch_kernel)
+ subq $(ISS64_OFFSET), %rsp
+ mov %r15, R64_R15(%rsp)
+ mov %rsp, %r15
/*
* Here for 64-bit user task or kernel
*/
-L_64bit_dispatch:
- subq $(ISS64_OFFSET), %rsp
- movl $(SS_64), SS_FLAVOR(%rsp)
+L_dispatch_64bit:
+ movl $(SS_64), SS_FLAVOR(%r15)
- cld
-
/*
* Save segment regs - for completeness since theyre not used.
*/
- mov %fs, R64_FS(%rsp)
- mov %gs, R64_GS(%rsp)
+ movl %fs, R64_FS(%r15)
+ movl %gs, R64_GS(%r15)
/* Save general-purpose registers */
- mov %rax, R64_RAX(%rsp)
- mov %rcx, R64_RCX(%rsp)
- mov %rbx, R64_RBX(%rsp)
- mov %rbp, R64_RBP(%rsp)
- mov %r11, R64_R11(%rsp)
- mov %r12, R64_R12(%rsp)
- mov %r13, R64_R13(%rsp)
- mov %r14, R64_R14(%rsp)
- mov %r15, R64_R15(%rsp)
+ mov %rax, R64_RAX(%r15)
+ mov %rbx, R64_RBX(%r15)
+ mov %rcx, R64_RCX(%r15)
+ mov %rdx, R64_RDX(%r15)
+ mov %rbp, R64_RBP(%r15)
+ mov %rdi, R64_RDI(%r15)
+ mov %rsi, R64_RSI(%r15)
+ mov %r8, R64_R8(%r15)
+ mov %r9, R64_R9(%r15)
+ mov %r10, R64_R10(%r15)
+ mov %r11, R64_R11(%r15)
+ mov %r12, R64_R12(%r15)
+ mov %r13, R64_R13(%r15)
+ mov %r14, R64_R14(%r15)
/* cr2 is significant only for page-faults */
mov %cr2, %rax
- mov %rax, R64_CR2(%rsp)
-
- /* Other registers (which may contain syscall args) */
- mov %rdi, R64_RDI(%rsp) /* arg0 .. */
- mov %rsi, R64_RSI(%rsp)
- mov %rdx, R64_RDX(%rsp)
- mov %r10, R64_R10(%rsp)
- mov %r8, R64_R8(%rsp)
- mov %r9, R64_R9(%rsp) /* .. arg5 */
+ mov %rax, R64_CR2(%r15)
- mov R64_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */
- mov R64_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */
- mov R64_CS(%rsp), %esi /* %esi := cs for later */
+ mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
+ mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
+ mov R64_CS(%r15), %esi /* %esi := cs for later */
- jmp L_common_dispatch
+ jmp L_common_dispatch
L_64bit_entry_reject:
/*
* Here for a 64-bit user attempting an invalid kernel entry.
*/
- pushq %rax
- leaq HNDL_ALLTRAPS(%rip), %rax
- movq %rax, ISF64_TRAPFN+8(%rsp)
- popq %rax
+ movq $(HNDL_ALLTRAPS), ISF64_TRAPFN(%rsp)
movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
- jmp L_64bit_dispatch
+ jmp L_dispatch_U64
-L_32bit_entry_check:
+Entry(ks_32bit_entry_check)
/*
* Check we're not a confused 64-bit user.
*/
jne L_64bit_entry_reject
/* fall through to 32-bit handler: */
-L_32bit_dispatch: /* 32-bit user task */
- subq $(ISC32_OFFSET), %rsp
- movl $(SS_32), SS_FLAVOR(%rsp)
+L_dispatch_U32: /* 32-bit user task */
+ subq $(ISS64_OFFSET), %rsp
+ mov %rsp, %r15
+ mov %gs:CPU_KERNEL_STACK, %rsp
+ movl $(SS_32), SS_FLAVOR(%r15)
- cld
/*
* Save segment regs
*/
- mov %ds, R32_DS(%rsp)
- mov %es, R32_ES(%rsp)
- mov %fs, R32_FS(%rsp)
- mov %gs, R32_GS(%rsp)
+ movl %ds, R32_DS(%r15)
+ movl %es, R32_ES(%r15)
+ movl %fs, R32_FS(%r15)
+ movl %gs, R32_GS(%r15)
/*
* Save general 32-bit registers
*/
- mov %eax, R32_EAX(%rsp)
- mov %ebx, R32_EBX(%rsp)
- mov %ecx, R32_ECX(%rsp)
- mov %edx, R32_EDX(%rsp)
- mov %ebp, R32_EBP(%rsp)
- mov %esi, R32_ESI(%rsp)
- mov %edi, R32_EDI(%rsp)
+ mov %eax, R32_EAX(%r15)
+ mov %ebx, R32_EBX(%r15)
+ mov %ecx, R32_ECX(%r15)
+ mov %edx, R32_EDX(%r15)
+ mov %ebp, R32_EBP(%r15)
+ mov %esi, R32_ESI(%r15)
+ mov %edi, R32_EDI(%r15)
/* Unconditionally save cr2; only meaningful on page faults */
mov %cr2, %rax
- mov %eax, R32_CR2(%rsp)
+ mov %eax, R32_CR2(%r15)
/*
* Copy registers already saved in the machine state
* (in the interrupt stack frame) into the compat save area.
*/
- mov ISC32_RIP(%rsp), %eax
- mov %eax, R32_EIP(%rsp)
- mov ISC32_RFLAGS(%rsp), %eax
- mov %eax, R32_EFLAGS(%rsp)
- mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */
-
- mov %esi, R32_CS(%rsp)
- mov ISC32_RSP(%rsp), %eax
- mov %eax, R32_UESP(%rsp)
- mov ISC32_SS(%rsp), %eax
- mov %eax, R32_SS(%rsp)
-L_32bit_dispatch_after_fault:
- mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */
- mov %ebx, R32_TRAPNO(%rsp)
- mov ISC32_ERR(%rsp), %eax
- mov %eax, R32_ERR(%rsp)
- mov ISC32_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */
+ mov R64_RIP(%r15), %eax
+ mov %eax, R32_EIP(%r15)
+ mov R64_RFLAGS(%r15), %eax
+ mov %eax, R32_EFLAGS(%r15)
+ mov R64_RSP(%r15), %eax
+ mov %eax, R32_UESP(%r15)
+ mov R64_SS(%r15), %eax
+ mov %eax, R32_SS(%r15)
+L_dispatch_U32_after_fault:
+ mov R64_CS(%r15), %esi /* %esi := %cs for later */
+ mov %esi, R32_CS(%r15)
+ mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
+ mov %ebx, R32_TRAPNO(%r15)
+ mov R64_ERR(%r15), %eax
+ mov %eax, R32_ERR(%r15)
+ mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
L_common_dispatch:
+ cld /* Ensure the direction flag is clear in the kernel */
+ cmpl $0, EXT(pmap_smap_enabled)(%rip)
+ je 1f
+ clac /* Clear EFLAGS.AC if SMAP is present/enabled */
+1:
/*
- * On entering the kernel, we don't need to switch cr3
+ * On entering the kernel, we typically don't switch CR3
* because the kernel shares the user's address space.
- * But we mark the kernel's cr3 as "active".
- * If, however, the invalid cr3 flag is set, we have to flush tlbs
- * since the kernel's mapping was changed while we were in userspace.
+ * But we mark the kernel's cr3 as "active" for TLB coherency evaluation
+ * If, however, the CPU's invalid TLB flag is set, we have to invalidate the TLB
+ * since the kernel pagetables were changed while we were in userspace.
*
- * But: if global no_shared_cr3 is TRUE we do switch to the kernel's cr3
+ * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
+ * we switch to the kernel's address space on entry. Also,
+ * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
* so that illicit accesses to userspace can be trapped.
*/
mov %gs:CPU_KERNEL_CR3, %rcx
mov %rcx, %gs:CPU_ACTIVE_CR3
test $3, %esi /* user/kernel? */
- jz 1f /* skip cr3 reload from kernel */
+ jz 2f /* skip cr3 reload from kernel */
xor %rbp, %rbp
+ cmpl $0, %gs:CPU_PAGEZERO_MAPPED
+ jnz 11f
cmpl $0, EXT(no_shared_cr3)(%rip)
- je 1f
+ je 2f
+11:
+ xor %eax, %eax
+ movw %gs:CPU_KERNEL_PCID, %ax
+ or %rax, %rcx
mov %rcx, %cr3 /* load kernel cr3 */
- jmp 2f /* and skip tlb flush test */
-1:
+ jmp 4f /* and skip tlb flush test */
+2:
mov %gs:CPU_ACTIVE_CR3+4, %rcx
shr $32, %rcx
testl %ecx, %ecx
- jz 2f
+ jz 4f
movl $0, %gs:CPU_TLB_INVALID
- testl $(1<<16), %ecx /* Global? */
- jz 11f
mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
and $(~CR4_PGE), %rcx
mov %rcx, %cr4
or $(CR4_PGE), %rcx
mov %rcx, %cr4
- jmp 2f
-
-11: mov %cr3, %rcx
- mov %rcx, %cr3
-2:
+4:
mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
+ testq %rcx, %rcx
+ je 5f
+ movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
- je 3f
- mov $0, %rcx /* If so, reset DR7 (the control) */
+ je 5f
+ xor %ecx, %ecx /* If so, reset DR7 (the control) */
mov %rcx, %dr7
-3:
+5:
incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
/* Dispatch the designated handler */
- jmp *%rdx
+ leaq EXT(idt64_hndl_table1)(%rip), %rax
+ jmp *(%rax, %rdx, 8)
/*
* Control is passed here to return to user.
mov %gs:CPU_ACTIVE_THREAD, %rdx
movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
- cmpq $0,%rax /* Is there a debug register context? */
+ test %rax, %rax /* Is there a debug register context? */
je 2f /* branch if not */
cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
jne 1f
mov %rcx, %gs:CPU_DR7
2:
/*
- * On exiting the kernel there's no need to switch cr3 since we're
+ * On exiting the kernel there's typically no need to switch cr3 since we're
* already running in the user's address space which includes the
- * kernel. Nevertheless, we now mark the task's cr3 as active.
- * But, if no_shared_cr3 is set, we do need to switch cr3 at this point.
+ * kernel. We now mark the task's cr3 as active, for TLB coherency.
+ * If the target address space has a pagezero mapping present, or
+ * if no_shared_cr3 is set, we do need to switch cr3 at this point.
*/
mov %gs:CPU_TASK_CR3, %rcx
mov %rcx, %gs:CPU_ACTIVE_CR3
+ cmpl $0, %gs:CPU_PAGEZERO_MAPPED
+ jnz L_cr3_switch_island
movl EXT(no_shared_cr3)(%rip), %eax
test %eax, %eax /* -no_shared_cr3 */
- jz 3f
- mov %rcx, %cr3
-3:
+ jnz L_cr3_switch_island
+
+L_cr3_switch_return:
mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
cmp $0, %rax
je 4f
mov %rax, %dr7 /* Set DR7 */
movq $0, %gs:CPU_DR7
4:
- cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */
+ cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
je L_64bit_return
L_32bit_return:
#if DEBUG_IDT64
- cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */
+ cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
je 1f
cli
POSTCODE2(0x6432)
- CCALL1(panic_idt64, %rsp)
+ CCALL1(panic_idt64, %r15)
1:
#endif /* DEBUG_IDT64 */
/*
* Restore registers into the machine state for iret.
+ * Here on fault stack and PCB address in R11.
*/
- movl R32_EIP(%rsp), %eax
- movl %eax, ISC32_RIP(%rsp)
- movl R32_EFLAGS(%rsp), %eax
- movl %eax, ISC32_RFLAGS(%rsp)
- movl R32_CS(%rsp), %eax
- movl %eax, ISC32_CS(%rsp)
- movl R32_UESP(%rsp), %eax
- movl %eax, ISC32_RSP(%rsp)
- movl R32_SS(%rsp), %eax
- movl %eax, ISC32_SS(%rsp)
-
+ movl R32_EIP(%r15), %eax
+ movl %eax, R64_RIP(%r15)
+ movl R32_EFLAGS(%r15), %eax
+ movl %eax, R64_RFLAGS(%r15)
+ movl R32_CS(%r15), %eax
+ movl %eax, R64_CS(%r15)
+ movl R32_UESP(%r15), %eax
+ movl %eax, R64_RSP(%r15)
+ movl R32_SS(%r15), %eax
+ movl %eax, R64_SS(%r15)
+
+ /* Validate DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
+ /* Exempt "known good" statically configured selectors, e.g. USER_DS and 0 */
+ cmpl $(USER_DS), R32_DS(%r15)
+ jz 22f
+ cmpl $0, R32_DS(%r15)
+ jz 22f
+ larw R32_DS(%r15), %ax
+ jz 22f
+ movl $(USER_DS), R32_DS(%r15)
+22:
+ cmpl $(USER_DS), R32_ES(%r15)
+ jz 33f
+ cmpl $0, R32_ES(%r15)
+ jz 33f
+ larw R32_ES(%r15), %ax
+ jz 33f
+ movl $(USER_DS), R32_ES(%r15)
+33:
+ cmpl $(USER_DS), R32_FS(%r15)
+ jz 44f
+ cmpl $0, R32_FS(%r15)
+ jz 44f
+ larw R32_FS(%r15), %ax
+ jz 44f
+ movl $(USER_DS), R32_FS(%r15)
+44:
+ cmpl $(USER_CTHREAD), R32_GS(%r15)
+ jz 55f
+ cmpl $0, R32_GS(%r15)
+ jz 55f
+ larw R32_GS(%r15), %ax
+ jz 55f
+ movl $(USER_CTHREAD), R32_GS(%r15)
+55:
/*
* Restore general 32-bit registers
*/
- movl R32_EAX(%rsp), %eax
- movl R32_EBX(%rsp), %ebx
- movl R32_ECX(%rsp), %ecx
- movl R32_EDX(%rsp), %edx
- movl R32_EBP(%rsp), %ebp
- movl R32_ESI(%rsp), %esi
- movl R32_EDI(%rsp), %edi
+ movl R32_EAX(%r15), %eax
+ movl R32_EBX(%r15), %ebx
+ movl R32_ECX(%r15), %ecx
+ movl R32_EDX(%r15), %edx
+ movl R32_EBP(%r15), %ebp
+ movl R32_ESI(%r15), %esi
+ movl R32_EDI(%r15), %edi
/*
- * Restore segment registers. We make take an exception here but
- * we've got enough space left in the save frame area to absorb
- * a hardware frame plus the trapfn and trapno
+ * Restore segment registers. A segment exception taken here will
+ * push state on the IST1 stack and will not affect the "PCB stack".
*/
+ mov %r15, %rsp /* Set the PCB as the stack */
swapgs
+
+ xor %r8, %r8
+ xor %r9, %r9
+ xor %r10, %r10
+ xor %r11, %r11
+ xor %r12, %r12
+ xor %r13, %r13
+ xor %r14, %r14
+ xor %r15, %r15
+
EXT(ret32_set_ds):
movw R32_DS(%rsp), %ds
EXT(ret32_set_es):
movw R32_GS(%rsp), %gs
/* pop compat frame + trapno, trapfn and error */
- add $(ISC32_OFFSET)+8+8+8, %rsp
- cmp $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
+ add $(ISS64_OFFSET)+8+8+8, %rsp
+ cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
/* test for fast entry/exit */
- je L_fast_exit
+ je L_fast_exit
EXT(ret32_iret):
- iretq /* return from interrupt */
+ iretq /* return from interrupt */
L_fast_exit:
pop %rdx /* user return eip */
popf /* flags - carry denotes failure */
pop %rcx /* user return esp */
sti /* interrupts enabled after sysexit */
- sysexit /* 32-bit sysexit */
+ sysexitl /* 32-bit sysexit */
+
+L_cr3_switch_island:
+ xor %eax, %eax
+ movw %gs:CPU_ACTIVE_PCID, %ax
+ or %rax, %rcx
+ mov %rcx, %cr3
+ jmp L_cr3_switch_return
ret_to_kernel:
#if DEBUG_IDT64
- cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */
+ cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
je 1f
cli
POSTCODE2(0x6464)
- CCALL1(panic_idt64, %rsp)
+ CCALL1(panic_idt64, %r15)
hlt
1:
- cmpl $(KERNEL64_CS), R64_CS(%rsp)
+ cmpl $(KERNEL64_CS), R64_CS(%r15)
je 2f
- CCALL1(panic_idt64, %rsp)
+ CCALL1(panic_idt64, %r15)
hlt
2:
#endif
L_64bit_return:
- testb $3, R64_CS(%rsp) /* returning to user-space? */
- jz 1f
- swapgs
-1:
-
- /*
- * Restore general 64-bit registers
- */
- mov R64_R15(%rsp), %r15
- mov R64_R14(%rsp), %r14
- mov R64_R13(%rsp), %r13
- mov R64_R12(%rsp), %r12
- mov R64_R11(%rsp), %r11
- mov R64_R10(%rsp), %r10
- mov R64_R9(%rsp), %r9
- mov R64_R8(%rsp), %r8
- mov R64_RSI(%rsp), %rsi
- mov R64_RDI(%rsp), %rdi
- mov R64_RBP(%rsp), %rbp
- mov R64_RDX(%rsp), %rdx
- mov R64_RBX(%rsp), %rbx
- mov R64_RCX(%rsp), %rcx
- mov R64_RAX(%rsp), %rax
-
- add $(ISS64_OFFSET)+24, %rsp /* pop saved state frame +
- trapno + trapfn and error */
- cmpl $(SYSCALL_CS),ISF64_CS-24(%rsp)
- /* test for fast entry/exit */
- je L_sysret
-.globl _dump_iretq
-EXT(ret64_iret):
- iretq /* return from interrupt */
-
-L_sysret:
- /*
- * Here to load rcx/r11/rsp and perform the sysret back to user-space.
- * rcx user rip
- * r1 user rflags
- * rsp user stack pointer
- */
- mov ISF64_RIP-24(%rsp), %rcx
- mov ISF64_RFLAGS-24(%rsp), %r11
- mov ISF64_RSP-24(%rsp), %rsp
- sysretq /* return from systen call */
-
-
-
-/*
- * System call handlers.
- * These are entered via a syscall interrupt. The system call number in %rax
- * is saved to the error code slot in the stack frame. We then branch to the
- * common state saving code.
- */
-
-#ifndef UNIX_INT
-#error NO UNIX INT!!!
-#endif
-Entry(idt64_unix_scall)
- swapgs /* switch to kernel gs (cpu_data) */
- pushq %rax /* save system call number */
- PUSH_FUNCTION(HNDL_UNIX_SCALL)
- pushq $(UNIX_INT)
- jmp L_32bit_entry_check
-
-
-Entry(idt64_mach_scall)
- swapgs /* switch to kernel gs (cpu_data) */
- pushq %rax /* save system call number */
- PUSH_FUNCTION(HNDL_MACH_SCALL)
- pushq $(MACH_INT)
- jmp L_32bit_entry_check
-
-
-Entry(idt64_mdep_scall)
- swapgs /* switch to kernel gs (cpu_data) */
- pushq %rax /* save system call number */
- PUSH_FUNCTION(HNDL_MDEP_SCALL)
- pushq $(MACHDEP_INT)
- jmp L_32bit_entry_check
-
-
-Entry(idt64_diag_scall)
- swapgs /* switch to kernel gs (cpu_data) */
- push %rax /* save system call number */
- PUSH_FUNCTION(HNDL_DIAG_SCALL)
- pushq $(DIAG_INT)
- jmp L_32bit_entry_check
-
-Entry(hi64_syscall)
-Entry(idt64_syscall)
-L_syscall_continue:
- swapgs /* Kapow! get per-cpu data area */
- mov %rsp, %gs:CPU_UBER_TMP /* save user stack */
- mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */
-
/*
- * Save values in the ISF frame in the PCB
- * to cons up the saved machine state.
+ * Restore general 64-bit registers.
+ * Here on fault stack and PCB address in R15.
*/
- movl $(USER_DS), ISF64_SS(%rsp)
- movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
- mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
- mov %rcx, ISF64_RIP(%rsp) /* rip */
- mov %gs:CPU_UBER_TMP, %rcx
- mov %rcx, ISF64_RSP(%rsp) /* user stack */
- mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
- movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
- leaq HNDL_SYSCALL(%rip), %r11;
- movq %r11, ISF64_TRAPFN(%rsp)
- jmp L_64bit_dispatch /* this can only be a 64-bit task */
-
-/*
- * sysenter entry point
- * Requires user code to set up:
- * edx: user instruction pointer (return address)
- * ecx: user stack pointer
- * on which is pushed stub ret addr and saved ebx
- * Return to user-space is made using sysexit.
- * Note: sysenter/sysexit cannot be used for calls returning a value in edx,
- * or requiring ecx to be preserved.
- */
-Entry(hi64_sysenter)
-Entry(idt64_sysenter)
- movq (%rsp), %rsp
- /*
- * Push values on to the PCB stack
- * to cons up the saved machine state.
- */
- push $(USER_DS) /* ss */
- push %rcx /* uesp */
- pushf /* flags */
- /*
- * Clear, among others, the Nested Task (NT) flags bit;
- * this is zeroed by INT, but not by SYSENTER.
- */
- push $0
- popf
- push $(SYSENTER_CS) /* cs */
-L_sysenter_continue:
- swapgs /* switch to kernel gs (cpu_data) */
- push %rdx /* eip */
- push %rax /* err/eax - syscall code */
- PUSH_FUNCTION(HNDL_SYSENTER)
- pushq $(T_SYSENTER)
- orl $(EFL_IF), ISF64_RFLAGS(%rsp)
- jmp L_32bit_entry_check
-
-
-Entry(idt64_page_fault)
- PUSH_FUNCTION(HNDL_ALLTRAPS)
- push $(T_PAGE_FAULT)
- push %rax /* save %rax temporarily */
- leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax
- cmp %rax, 8+ISF64_RIP(%rsp) /* fault during copy args? */
- je 1f /* - yes, handle copy arg fault */
- testb $3, 8+ISF64_CS(%rsp) /* was trap from kernel? */
- jz L_kernel_trap /* - yes, handle with care */
- pop %rax /* restore %rax, swapgs, and continue */
- swapgs
- jmp L_dispatch_continue
-1:
- add $(8+ISF64_SIZE), %rsp /* remove entire intr stack frame */
- jmp L_copy_args_continue /* continue system call entry */
-
-
-/*
- * Debug trap. Check for single-stepping across system call into
- * kernel. If this is the case, taking the debug trap has turned
- * off single-stepping - save the flags register with the trace
- * bit set.
- */
-Entry(idt64_debug)
- push $0 /* error code */
- PUSH_FUNCTION(HNDL_ALLTRAPS)
- pushq $(T_DEBUG)
-
- testb $3, ISF64_CS(%rsp)
- jnz L_dispatch
+ leaq EXT(idt64_hndl_table0)(%rip), %rax
+ jmp *8(%rax)
+Entry(ks_idt64_debug_kernel)
/*
* trap came from kernel mode
*/
lea EXT(idt64_sysenter)(%rip), %rax
cmp %rax, ISF64_RIP+8(%rsp)
pop %rax
- jne L_dispatch
+ jne EXT(ks_dispatch_kernel)
/*
* Interrupt stack frame has been pushed on the temporary stack.
* We have to switch to pcb stack and patch up the saved state.
mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
jmp L_sysenter_continue /* continue sysenter entry */
+Entry(ks_trap_check_kernel_exit)
+ testb $3,ISF64_CS(%rsp)
+ jz L_kernel_gpf
-Entry(idt64_double_fault)
- PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
- pushq $(T_DOUBLE_FAULT)
-
+ /* Here for fault from user-space. Copy interrupt state to PCB. */
+ swapgs
push %rax
- leaq EXT(idt64_syscall)(%rip), %rax
- cmp %rax, ISF64_RIP+8(%rsp)
+ mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */
+ mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
+ mov ISF64_SS+8(%rsp), %rax
+ mov %rax, ISF64_SS(%rcx)
+ mov ISF64_RSP+8(%rsp), %rax
+ mov %rax, ISF64_RSP(%rcx)
+ mov ISF64_RFLAGS+8(%rsp), %rax
+ mov %rax, ISF64_RFLAGS(%rcx)
+ mov ISF64_CS+8(%rsp), %rax
+ mov %rax, ISF64_CS(%rcx)
+ mov ISF64_RIP+8(%rsp), %rax
+ mov %rax, ISF64_RIP(%rcx)
+ mov ISF64_ERR+8(%rsp), %rax
+ mov %rax, ISF64_ERR(%rcx)
+ mov ISF64_TRAPFN+8(%rsp), %rax
+ mov %rax, ISF64_TRAPFN(%rcx)
+ mov ISF64_TRAPNO+8(%rsp), %rax
+ mov %rax, ISF64_TRAPNO(%rcx)
pop %rax
- jne L_64bit_dispatch
-
- mov ISF64_RSP(%rsp), %rsp
- jmp L_syscall_continue
-
+ mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */
+ xchg %rcx, %rsp /* to PCB stack with user RCX */
+ jmp EXT(ks_dispatch_user)
-/*
- * General protection or segment-not-present fault.
- * Check for a GP/NP fault in the kernel_return
- * sequence; if there, report it as a GP/NP fault on the user's instruction.
- *
- * rsp-> 0 ISF64_TRAPNO: trap code (NP or GP)
- * 8 ISF64_TRAPFN: trap function
- * 16 ISF64_ERR: segment number in error (error code)
- * 24 ISF64_RIP: rip
- * 32 ISF64_CS: cs
- * 40 ISF64_RFLAGS: rflags
- * 48 ISF64_RIP: rsp
- * 56 ISF64_SS: ss
- * 64: old registers (trap is from kernel)
- */
-Entry(idt64_gen_prot)
- PUSH_FUNCTION(HNDL_ALLTRAPS)
- pushq $(T_GENERAL_PROTECTION)
- jmp trap_check_kernel_exit /* check for kernel exit sequence */
-
-Entry(idt64_stack_fault)
- PUSH_FUNCTION(HNDL_ALLTRAPS)
- pushq $(T_STACK_FAULT)
- jmp trap_check_kernel_exit /* check for kernel exit sequence */
-
-Entry(idt64_segnp)
- PUSH_FUNCTION(HNDL_ALLTRAPS)
- pushq $(T_SEGMENT_NOT_PRESENT)
- /* indicate fault type */
-trap_check_kernel_exit:
- testb $3,ISF64_CS(%rsp)
- jnz L_dispatch
- /*
- * trap was from kernel mode,
- * so check for the kernel exit sequence
- */
+L_kernel_gpf:
+ /* Here for GPF from kernel_space. Check for recoverable cases. */
push %rax
-
leaq EXT(ret32_iret)(%rip), %rax
cmp %rax, 8+ISF64_RIP(%rsp)
je L_fault_iret
leaq EXT(ret32_set_gs)(%rip), %rax
cmp %rax, 8+ISF64_RIP(%rsp)
je L_32bit_fault_set_seg
+ jmp EXT(ks_kernel_trap)
+ /* Fall through */
- leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax
- cmp %rax, 8+ISF64_RIP(%rsp)
- cmove 8+ISF64_RSP(%rsp), %rsp
- je L_copy_args_continue
-
- /* fall through */
-
-L_kernel_trap:
+Entry(ks_kernel_trap)
/*
* Here after taking an unexpected trap from kernel mode - perhaps
* while running in the trampolines hereabouts.
* Note: %rax has been pushed on stack.
* Make sure we're not on the PCB stack, if so move to the kernel stack.
* This is likely a fatal condition.
- * But first, try to ensure we have the kernel gs base active...
+ * But first, ensure we have the kernel gs base active...
*/
- movq %gs:CPU_THIS, %rax /* get gs_base into %rax */
- test %rax, %rax /* test sign bit (MSB) */
- js 1f /* -ve kernel addr, no swap */
- swapgs /* +ve user addr, swap */
+ push %rcx
+ push %rdx
+ mov $(MSR_IA32_GS_BASE), %ecx
+ rdmsr /* read kernel gsbase */
+ test $0x80000000, %edx /* test MSB of address */
+ jne 1f
+ swapgs /* so swap */
1:
+ pop %rdx
+ pop %rcx
+
movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
subq %rsp, %rax
cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
jb 2f /* - yes, deal with it */
pop %rax /* - no, restore %rax */
- jmp L_64bit_dispatch
+ jmp EXT(ks_dispatch_kernel)
2:
/*
* Here if %rsp is in the PCB
pushq 8+ISF64_TRAPFN(%rax)
pushq 8+ISF64_TRAPNO(%rax)
movq (%rax), %rax
- jmp L_64bit_dispatch
+ jmp EXT(ks_dispatch_kernel)
+
/*
* GP/NP fault on IRET: CS or SS is in error.
- * Note that the user ss is originally 16-byte aligned, we'd popped the
- * stack back to contain just the rip/cs/rflags/rsp/ss before issuing the iret.
- * On taking the GP/NP fault on the iret instruction, the stack is 16-byte
- * aligned before pushed the interrupt frame. Hence, an 8-byte padding exists.
+ * User GSBASE is active.
+ * On IST1 stack containing:
+ * (rax saved above, which is immediately popped)
+ * 0 ISF64_TRAPNO: trap code (NP or GP)
+ * 8 ISF64_TRAPFN: trap function
+ * 16 ISF64_ERR: segment number in error (error code)
+ * 24 ISF64_RIP: kernel RIP
+ * 32 ISF64_CS: kernel CS
+ * 40 ISF64_RFLAGS: kernel RFLAGS
+ * 48 ISF64_RSP: kernel RSP
+ * 56 ISF64_SS: kernel SS
+ * On the PCB stack, pointed to by the kernel's RSP is:
+ * 0 user RIP
+ * 8 user CS
+ * 16 user RFLAGS
+ * 24 user RSP
+ * 32 user SS
*
- * on SP is
- * (- rax saved above, which is immediately popped)
+ * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
+ * as a user fault with:
* 0 ISF64_TRAPNO: trap code (NP or GP)
* 8 ISF64_TRAPFN: trap function
* 16 ISF64_ERR: segment number in error (error code)
- * 24 ISF64_RIP: rip
- * 32 ISF64_CS: cs
- * 40 ISF64_RFLAGS: rflags
- * 48 ISF64_RSP: rsp --> new trapno
- * 56 ISF64_SS: ss --> new trapfn
- * 64 pad --> new errcode
- * 72 user rip
- * 80 user cs
- * 88 user rflags
- * 96 user rsp
- * 104 user ss (16-byte aligned)
+ * 24 user RIP
+ * 32 user CS
+ * 40 user RFLAGS
+ * 48 user RSP
+ * 56 user SS
*/
L_fault_iret:
pop %rax /* recover saved %rax */
mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
- mov ISF64_TRAPNO(%rsp), %rax
- mov %rax, ISF64_TRAPNO(%rsp)/* put in user trap number */
- mov ISF64_TRAPFN(%rsp), %rax
- mov %rax, ISF64_SS(%rsp) /* put in user trap function */
- mov ISF64_ERR(%rsp), %rax /* get error code */
- mov %rax, 8+ISF64_SS(%rsp) /* put in user errcode */
- mov ISF64_RIP(%rsp), %rax /* restore rax */
- add $(ISF64_RSP),%rsp /* reset to new trapfn */
+ mov ISF64_RSP(%rsp), %rax
+ xchg %rax, %rsp /* switch to PCB stack */
+ push ISF64_ERR(%rax)
+ push ISF64_TRAPFN(%rax)
+ push ISF64_TRAPNO(%rax)
+ mov ISF64_RIP(%rax), %rax /* restore rax */
/* now treat as fault from user */
jmp L_dispatch
/*
* Fault restoring a segment register. All of the saved state is still
* on the stack untouched since we haven't yet moved the stack pointer.
+ * On IST1 stack containing:
+ * (rax saved above, which is immediately popped)
+ * 0 ISF64_TRAPNO: trap code (NP or GP)
+ * 8 ISF64_TRAPFN: trap function
+ * 16 ISF64_ERR: segment number in error (error code)
+ * 24 ISF64_RIP: kernel RIP
+ * 32 ISF64_CS: kernel CS
+ * 40 ISF64_RFLAGS: kernel RFLAGS
+ * 48 ISF64_RSP: kernel RSP
+ * 56 ISF64_SS: kernel SS
+ * On the PCB stack, pointed to by the kernel's RSP is:
+ * 0 user trap code
+ * 8 user trap function
+ * 16 user err
+ * 24 user RIP
+ * 32 user CS
+ * 40 user RFLAGS
+ * 48 user RSP
+ * 56 user SS
*/
L_32bit_fault_set_seg:
swapgs
mov ISF64_TRAPFN(%rsp), %rcx
mov ISF64_ERR(%rsp), %rdx
mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
- mov %rax,ISC32_TRAPNO(%rsp)
- mov %rcx,ISC32_TRAPFN(%rsp)
- mov %rdx,ISC32_ERR(%rsp)
+ mov %rax,R64_TRAPNO(%rsp)
+ mov %rcx,R64_TRAPFN(%rsp)
+ mov %rdx,R64_ERR(%rsp)
/* now treat as fault from user */
/* except that all the state is */
/* already saved - we just have to */
/* move the trapno and error into */
/* the compatibility frame */
- jmp L_32bit_dispatch_after_fault
+ jmp L_dispatch_U32_after_fault
-/*
- * Fatal exception handlers:
- */
-Entry(idt64_db_task_dbl_fault)
- PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
- pushq $(T_DOUBLE_FAULT)
- jmp L_dispatch
+Entry(ks_idt64_nmi_kernel)
+ /* From user-space: copy interrupt state to user PCB */
+ swapgs
+ mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
+ add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
+ swapgs /* swap back for L_dispatch */
+ jmp 4f /* Copy state to PCB */
-Entry(idt64_db_task_stk_fault)
- PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
- pushq $(T_STACK_FAULT)
- jmp L_dispatch
+1:
+ /*
+ * From kernel-space:
+ * Determine whether the kernel or user GS is set.
+ * Set the kernel and ensure that we'll swap back correctly at IRET.
+ */
+ mov $(MSR_IA32_GS_BASE), %ecx
+ rdmsr /* read kernel gsbase */
+ test $0x80000000, %edx /* test MSB of address */
+ jne 2f
+ swapgs /* so swap */
+ movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
+2:
+ /*
+ * Determine whether we're on the kernel or interrupt stack
+ * when the NMI hit.
+ */
+ mov ISF64_RSP(%rsp), %rcx
+ mov %gs:CPU_KERNEL_STACK, %rax
+ xor %rcx, %rax
+ and EXT(kernel_stack_mask)(%rip), %rax
+ test %rax, %rax /* are we on the kernel stack? */
+ je 3f /* yes */
+
+ mov %gs:CPU_INT_STACK_TOP, %rax
+ dec %rax /* intr stack top is byte above max */
+ xor %rcx, %rax
+ and EXT(kernel_stack_mask)(%rip), %rax
+ test %rax, %rax /* are we on the interrupt stack? */
+ je 3f /* yes */
+
+ mov %gs:CPU_KERNEL_STACK, %rcx
+3:
+ /* 16-byte-align kernel/interrupt stack for state push */
+ and $0xFFFFFFFFFFFFFFF0, %rcx
-Entry(idt64_mc)
- push $(0) /* Error */
- PUSH_FUNCTION(HNDL_MACHINE_CHECK)
- pushq $(T_MACHINE_CHECK)
- jmp L_dispatch
+4:
+ /*
+ * Copy state from NMI stack (RSP) to the save area (RCX) which is
+ * the PCB for user or kernel/interrupt stack from kernel.
+ * ISF64_ERR(RSP) saved RAX
+ * ISF64_TRAPFN(RSP) saved RCX
+ * ISF64_TRAPNO(RSP) saved RDX
+ */
+ xchg %rsp, %rcx /* set for pushes */
+ push ISF64_SS(%rcx)
+ push ISF64_RSP(%rcx)
+ push ISF64_RFLAGS(%rcx)
+ push ISF64_CS(%rcx)
+ push ISF64_RIP(%rcx)
+ push $(0) /* error code 0 */
+ push $(HNDL_ALLINTRS) /* trapfn allintrs */
+ push $(T_NMI) /* trapno T_NMI */
+ mov ISF64_ERR(%rcx), %rax
+ mov ISF64_TRAPNO(%rcx), %rdx
+ mov ISF64_TRAPFN(%rcx), %rcx
+ jmp L_dispatch
-/* All 'exceptions' enter hndl_alltraps:
- * rsp -> x86_saved_state_t
- * esi cs at trap
+/* All 'exceptions' enter hndl_alltraps, with:
+ * r15 x86_saved_state_t address
+ * rsp kernel stack if user-space, otherwise interrupt or kernel stack
+ * esi cs at trap
*
* The rest of the state is set up as:
+ * both rsp and r15 are 16-byte aligned
* interrupts disabled
* direction flag cleared
*/
/* Check for active vtimers in the current task */
mov %gs:CPU_ACTIVE_THREAD, %rcx
+ movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
mov TH_TASK(%rcx), %rbx
TASK_VTIMER_CHECK(%rbx, %rcx)
- movq %rsp, %rdi /* also pass it as arg0 */
- movq %gs:CPU_KERNEL_STACK,%rsp /* switch to kernel stack */
-
- CCALL(user_trap) /* call user trap routine */
+ CCALL1(user_trap, %r15) /* call user trap routine */
/* user_trap() unmasks interrupts */
cli /* hold off intrs - critical section */
xorl %ecx, %ecx /* don't check if we're in the PFZ */
-#define CLI cli
-#define STI sti
Entry(return_from_trap)
- movq %gs:CPU_ACTIVE_THREAD,%rsp
- movq TH_PCB_ISS(%rsp), %rsp /* switch back to PCB stack */
+ movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
+ movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
+ cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
+ jz 1f
+ xorq %rbp, %rbp /* clear framepointer */
+ mov %r15, %rdi /* Set RDI to current thread */
+ CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
+1:
+ movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
movl %gs:CPU_PENDING_AST,%eax
testl %eax,%eax
- je EXT(return_to_user) /* branch if no AST */
+ je EXT(return_to_user) /* branch if no AST */
L_return_from_trap_with_ast:
- movq %rsp, %r13
- movq %gs:CPU_KERNEL_STACK, %rsp
-
testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
je 2f /* no, go handle the AST */
- cmpl $(SS_64), SS_FLAVOR(%r13) /* are we a 64-bit task? */
+ cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
je 1f
/* no... 32-bit user mode */
- movl R32_EIP(%r13), %edi
+ movl R32_EIP(%r15), %edi
xorq %rbp, %rbp /* clear framepointer */
CCALL(commpage_is_in_pfz32)
testl %eax, %eax
je 2f /* not in the PFZ... go service AST */
- movl %eax, R32_EBX(%r13) /* let the PFZ know we've pended an AST */
- movq %r13, %rsp /* switch back to PCB stack */
+ movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
jmp EXT(return_to_user)
1:
- movq R64_RIP(%r13), %rdi
+ movq R64_RIP(%r15), %rdi
xorq %rbp, %rbp /* clear framepointer */
CCALL(commpage_is_in_pfz64)
testl %eax, %eax
je 2f /* not in the PFZ... go service AST */
- movl %eax, R64_RBX(%r13) /* let the PFZ know we've pended an AST */
- movq %r13, %rsp /* switch back to PCB stack */
+ movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
jmp EXT(return_to_user)
-2:
- STI /* interrupts always enabled on return to user mode */
+2:
- xor %edi, %edi /* zero %rdi */
xorq %rbp, %rbp /* clear framepointer */
- CCALL(i386_astintr) /* take the AST */
+ CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
- CLI
+ cli
+ mov %rsp, %r15 /* AST changes stack, saved state */
xorl %ecx, %ecx /* don't check if we're in the PFZ */
jmp EXT(return_from_trap) /* and check again (rare) */
* Trap from kernel mode. No need to switch stacks.
* Interrupts must be off here - we will set them to state at time of trap
* as soon as it's safe for us to do so and not recurse doing preemption
+ *
*/
-hndl_kerntrap:
trap_from_kernel:
-
- movq %rsp, %rdi /* saved state addr */
- pushq R64_RIP(%rsp) /* Simulate a CALL from fault point */
+ movq %r15, %rdi /* saved state addr */
+ pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
pushq %rbp /* Extend framepointer chain */
movq %rsp, %rbp
CCALLWITHSP(kernel_trap) /* to kernel trap routine */
popq %rbp
addq $8, %rsp
+ mov %rsp, %r15 /* DTrace slides stack/saved-state */
cli
movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
testl $(AST_URGENT),%eax /* any urgent preemption? */
je ret_to_kernel /* no, nothing to do */
- cmpl $(T_PREEMPT),R64_TRAPNO(%rsp)
+ cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
- testl $(EFL_IF),R64_RFLAGS(%rsp) /* interrupts disabled? */
+ testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
je ret_to_kernel
cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
jne ret_to_kernel
testq %rcx,%rcx /* are we on the kernel stack? */
jne ret_to_kernel /* no, skip it */
- CCALL1(i386_astintr, $1) /* take the AST */
+ CCALL(ast_taken_kernel) /* take the AST */
+
+ mov %rsp, %r15 /* AST changes stack, saved state */
jmp ret_to_kernel
/*
* All interrupts on all tasks enter here with:
- * rsp-> x86_saved_state_t
+ * r15 x86_saved_state_t
+ * rsp kernel or interrupt stack
* esi cs at trap
*
+ * both rsp and r15 are 16-byte aligned
* interrupts disabled
* direction flag cleared
*/
orl $(CR0_TS),%eax /* or in TS bit */
mov %rax,%cr0 /* set cr0 */
- subq $8, %rsp /* for 16-byte stack alignment */
pushq %rcx /* save pointer to old stack */
- movq %rcx,%gs:CPU_INT_STATE /* save intr state */
+ pushq %gs:CPU_INT_STATE /* save previous intr state */
+ movq %r15,%gs:CPU_INT_STATE /* set intr state */
TIME_INT_ENTRY /* do timing */
incl %gs:CPU_PREEMPTION_LEVEL
incl %gs:CPU_INTERRUPT_LEVEL
- movq %gs:CPU_INT_STATE, %rdi
-
- CCALL(interrupt) /* call generic interrupt routine */
-
- cli /* just in case we returned with intrs enabled */
- xor %rax,%rax
- movq %rax,%gs:CPU_INT_STATE /* clear intr state pointer */
+ CCALL1(interrupt, %r15) /* call generic interrupt routine */
- .globl EXT(return_to_iret)
+.globl EXT(return_to_iret)
LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
decl %gs:CPU_INTERRUPT_LEVEL
TIME_INT_EXIT /* do timing */
+ popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
+ popq %rsp /* switch back to old stack */
+
movq %gs:CPU_ACTIVE_THREAD,%rax
movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
cmpq $0,%rax /* Is there a context */
orl $(CR0_TS),%eax /* or in TS bit */
mov %rax,%cr0 /* set cr0 */
2:
- popq %rsp /* switch back to old stack */
-
/* Load interrupted code segment into %eax */
- movl R32_CS(%rsp),%eax /* assume 32-bit state */
- cmpl $(SS_64),SS_FLAVOR(%rsp)/* 64-bit? */
+ movl R32_CS(%r15),%eax /* assume 32-bit state */
+ cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
#if DEBUG_IDT64
jne 4f
- movl R64_CS(%rsp),%eax /* 64-bit user mode */
+ movl R64_CS(%r15),%eax /* 64-bit user mode */
jmp 3f
4:
- cmpl $(SS_32),SS_FLAVOR(%rsp)
+ cmpl $(SS_32),SS_FLAVOR(%r15)
je 3f
POSTCODE2(0x6431)
- CCALL1(panic_idt64, %rsp)
+ CCALL1(panic_idt64, %r15)
hlt
#else
jne 3f
- movl R64_CS(%rsp),%eax /* 64-bit user mode */
+ movl R64_CS(%r15),%eax /* 64-bit user mode */
#endif
3:
testb $3,%al /* user mode, */
cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
jne ret_to_kernel /* yes, skip it */
- movq %gs:CPU_KERNEL_STACK,%rax
- movq %rsp,%rcx
- xorq %rax,%rcx
- andq EXT(kernel_stack_mask)(%rip),%rcx
- testq %rcx,%rcx /* are we on the kernel stack? */
- jne ret_to_kernel /* no, skip it */
-
/*
* Take an AST from kernel space. We don't need (and don't want)
* to do as much as the case where the interrupt came from user
* space.
*/
- CCALL1(i386_astintr, $1)
+ CCALL(ast_taken_kernel)
+ mov %rsp, %r15 /* AST changes stack, saved state */
jmp ret_to_kernel
incl %gs:CPU_PREEMPTION_LEVEL
incl %gs:CPU_INTERRUPT_LEVEL
incl %gs:CPU_NESTED_ISTACK
- mov %rsp, %rdi /* x86_saved_state */
- CCALL(interrupt)
+
+ push %gs:CPU_INT_STATE
+ mov %r15, %gs:CPU_INT_STATE
+
+ CCALL1(interrupt, %r15)
+
+ pop %gs:CPU_INT_STATE
decl %gs:CPU_INTERRUPT_LEVEL
decl %gs:CPU_PREEMPTION_LEVEL
decl %gs:CPU_NESTED_ISTACK
-#if DEBUG_IDT64
- CCALL1(panic_idt64, %rsp)
- POSTCODE2(0x6411)
- hlt
-#endif
+
jmp ret_to_kernel
/*
* 32bit Tasks
* System call entries via INTR_GATE or sysenter:
*
- * rsp -> x86_saved_state32_t
+ * r15 x86_saved_state32_t
+ * rsp kernel stack
+ *
+ * both rsp and r15 are 16-byte aligned
* interrupts disabled
* direction flag cleared
*/
* We can be here either for a mach syscall or a unix syscall,
* as indicated by the sign of the code:
*/
- movl R32_EAX(%rsp),%eax
+ movl R32_EAX(%r15),%eax
testl %eax,%eax
js EXT(hndl_mach_scall) /* < 0 => mach */
/* > 0 => unix */
Entry(hndl_unix_scall)
-/* If the caller (typically LibSystem) has recorded the cumulative size of
- * the arguments in EAX, copy them over from the user stack directly.
- * We recover from exceptions inline--if the copy loop doesn't complete
- * due to an exception, we fall back to copyin from compatibility mode.
- * We can potentially extend this mechanism to mach traps as well (DRK).
- */
- testl $(I386_SYSCALL_ARG_BYTES_MASK), %eax
- jz L_copy_args_continue
- movl %eax, %ecx
- mov %gs:CPU_UBER_ARG_STORE_VALID, %rbx
- shrl $(I386_SYSCALL_ARG_DWORDS_SHIFT), %ecx
- andl $(I386_SYSCALL_ARG_DWORDS_MASK), %ecx
- mov %gs:CPU_UBER_ARG_STORE, %rdi
- mov ISC32_RSP(%rsp), %rsi
- add $4, %rsi
- movl $0, (%rbx)
-
-EXT(idt64_unix_scall_copy_args):
- rep movsl
- movl $1, (%rbx)
-L_copy_args_continue:
TIME_TRAP_UENTRY
- movq %gs:CPU_KERNEL_STACK,%rdi
- xchgq %rdi,%rsp /* switch to kernel stack */
movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
movq TH_TASK(%rcx),%rbx /* point to current task */
incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
sti
- CCALL(unix_syscall)
+ CCALL1(unix_syscall, %r15)
/*
* always returns through thread_exception_return
*/
Entry(hndl_mach_scall)
TIME_TRAP_UENTRY
- movq %gs:CPU_KERNEL_STACK,%rdi
- xchgq %rdi,%rsp /* switch to kernel stack */
movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
movq TH_TASK(%rcx),%rbx /* point to current task */
incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
sti
- CCALL(mach_call_munger)
+ CCALL1(mach_call_munger, %r15)
/*
* always returns through thread_exception_return
*/
Entry(hndl_mdep_scall)
TIME_TRAP_UENTRY
- movq %gs:CPU_KERNEL_STACK,%rdi
- xchgq %rdi,%rsp /* switch to kernel stack */
-
/* Check for active vtimers in the current task */
movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
movq TH_TASK(%rcx),%rbx /* point to current task */
sti
- CCALL(machdep_syscall)
+ CCALL1(machdep_syscall, %r15)
/*
* always returns through thread_exception_return
*/
-
-Entry(hndl_diag_scall)
- TIME_TRAP_UENTRY
-
- movq %gs:CPU_KERNEL_STACK,%rdi
- xchgq %rdi,%rsp /* switch to kernel stack */
-
- /* Check for active vtimers in the current task */
- movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
- movq TH_TASK(%rcx),%rbx /* point to current task */
- TASK_VTIMER_CHECK(%rbx,%rcx)
-
- pushq %rdi /* push pcb stack */
-
- CCALL(diagCall) // Call diagnostics
-
- cli // Disable interruptions just in case
- cmpl $0,%eax // What kind of return is this?
- je 1f // - branch if bad (zero)
- popq %rsp // Get back the pcb stack
- jmp EXT(return_to_user) // Normal return, do not check asts...
-1:
- CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
- // pass what would be the diag syscall
- // error return - cause an exception
- /* no return */
-
-
/*
* 64bit Tasks
* System call entries via syscall only:
*
- * rsp -> x86_saved_state64_t
+ * r15 x86_saved_state64_t
+ * rsp kernel stack
+ *
+ * both rsp and r15 are 16-byte aligned
* interrupts disabled
* direction flag cleared
*/
Entry(hndl_syscall)
TIME_TRAP_UENTRY
- movq %gs:CPU_KERNEL_STACK,%rdi
- xchgq %rdi,%rsp /* switch to kernel stack */
movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
+ movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
movq TH_TASK(%rcx),%rbx /* point to current task */
/* Check for active vtimers in the current task */
* We can be here either for a mach, unix machdep or diag syscall,
* as indicated by the syscall class:
*/
- movl R64_RAX(%rdi), %eax /* syscall number/class */
+ movl R64_RAX(%r15), %eax /* syscall number/class */
movl %eax, %edx
andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
je EXT(hndl_diag_scall64)
/* Syscall class unknown */
+ sti
CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
/* no return */
incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
sti
- CCALL(unix_syscall64)
+ CCALL1(unix_syscall64, %r15)
/*
* always returns through thread_exception_return
*/
incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
sti
- CCALL(mach_call_munger64)
+ CCALL1(mach_call_munger64, %r15)
/*
* always returns through thread_exception_return
*/
Entry(hndl_mdep_scall64)
sti
- CCALL(machdep_syscall64)
+ CCALL1(machdep_syscall64, %r15)
/*
* always returns through thread_exception_return
*/
-
Entry(hndl_diag_scall64)
- pushq %rdi // Push the previous stack
- CCALL(diagCall64) // Call diagnostics
- cli // Disable interruptions just in case
- cmpl $0,%eax // What kind of return is this?
+ CCALL1(diagCall64, %r15) // Call diagnostics
+ test %eax, %eax // What kind of return is this?
je 1f // - branch if bad (zero)
- popq %rsp // Get back the pcb stack
jmp EXT(return_to_user) // Normal return, do not check asts...
1:
+ sti
CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
/* no return */
-
+/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
Entry(hndl_machine_check)
- CCALL1(panic_machine_check64, %rsp)
+ /* Adjust SP and savearea to their canonical, non-aliased addresses */
+ subq EXT(dblmap_dist)(%rip), %rsp
+ subq EXT(dblmap_dist)(%rip), %r15
+ CCALL1(panic_machine_check64, %r15)
hlt
Entry(hndl_double_fault)
- CCALL1(panic_double_fault64, %rsp)
+ subq EXT(dblmap_dist)(%rip), %rsp
+ subq EXT(dblmap_dist)(%rip), %r15
+ CCALL1(panic_double_fault64, %r15)
hlt