2 * Copyright (c) 2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <i386/eflags.h>
31 #include <i386/rtclock_asm.h>
32 #include <i386/trap.h>
33 #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
34 #include <mach/i386/syscall_sw.h>
35 #include <i386/postcode.h>
36 #include <i386/proc_reg.h>
37 #include <mach/exception_types.h>
44 * This is the low-level trap and interrupt handling code associated with
45 * the IDT. It also includes system call handlers for sysenter/syscall.
46 * The IDT itself is defined in mp_desc.c.
48 * Code here is structured as follows:
50 * stubs Code called directly from an IDT vector.
51 * All entry points have the "idt64_" prefix and they are built
52 * using macros expanded by the inclusion of idt_table.h.
53 * This code performs vector-dependent identification and jumps
54 * into the dispatch code.
56 * dispatch The dispatch code is responsible for saving the thread state
57 * (which is either 64-bit or 32-bit) and then jumping to the
58 * class handler identified by the stub.
60 * returns Code to restore state and return to the previous context.
62 * handlers There are several classes of handlers:
63 * interrupt - asynchronous events typically from external devices
64 * trap - synchronous events due to thread execution
65 * syscall - synchronous system call request
72 #define HNDL_ALLINTRS EXT(hndl_allintrs)
73 #define HNDL_ALLTRAPS EXT(hndl_alltraps)
74 #define HNDL_SYSENTER EXT(hndl_sysenter)
75 #define HNDL_SYSCALL EXT(hndl_syscall)
76 #define HNDL_UNIX_SCALL EXT(hndl_unix_scall)
77 #define HNDL_MACH_SCALL EXT(hndl_mach_scall)
78 #define HNDL_MDEP_SCALL EXT(hndl_mdep_scall)
79 #define HNDL_DOUBLE_FAULT EXT(hndl_double_fault)
80 #define HNDL_MACHINE_CHECK EXT(hndl_machine_check)
84 #define PUSH_FUNCTION(func) \
87 leaq func(%rip), %rax ;\
91 #define PUSH_FUNCTION(func) pushq func
94 /* The wrapper for all non-special traps/interrupts */
95 /* Everything up to PUSH_FUNCTION is just to output
96 * the interrupt number out to the postcode display
99 #define IDT_ENTRY_WRAPPER(n, f) \
101 POSTCODE2(0x6400+n) ;\
107 #define IDT_ENTRY_WRAPPER(n, f) \
113 /* A trap that comes with an error code already on the stack */
114 #define TRAP_ERR(n, f) \
116 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
122 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
124 #define USER_TRAP TRAP
127 #define INTERRUPT(n) \
128 Entry(_intr_ ## n) ;\
130 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
132 /* A trap with a special-case handler, hence we don't need to define anything */
133 #define TRAP_SPC(n, f)
134 #define TRAP_IST(n, f)
135 #define USER_TRAP_SPC(n, f)
137 /* Generate all the stubs */
138 #include "idt_table.h"
141 * Common dispatch point.
142 * Determine what mode has been interrupted and save state accordingly.
145 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
151 * Check for trap from EFI32, and restore cr3 and rsp if so.
152 * A trap from EFI32 is fatal.
154 cmpl $(KERNEL32_CS), ISF64_CS(%rsp)
155 jne L_dispatch_continue
157 mov EXT(pal_efi_saved_cr3)(%rip), %rcx
160 shr $32, %rcx /* splice the upper 32-bits of rip */
161 shl $32, %rsp /* .. and the lower 32-bits of rsp */
162 shrd $32, %rcx, %rsp /* to recover the full 64-bits of rsp */
166 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
167 je L_32bit_dispatch /* 32-bit user task */
168 /* fall through to 64bit user dispatch */
171 * Here for 64-bit user task or kernel
174 subq $(ISS64_OFFSET), %rsp
175 movl $(SS_64), SS_FLAVOR(%rsp)
180 * Save segment regs - for completeness since theyre not used.
182 movl %fs, R64_FS(%rsp)
183 movl %gs, R64_GS(%rsp)
185 /* Save general-purpose registers */
186 mov %rax, R64_RAX(%rsp)
187 mov %rcx, R64_RCX(%rsp)
188 mov %rbx, R64_RBX(%rsp)
189 mov %rbp, R64_RBP(%rsp)
190 mov %r11, R64_R11(%rsp)
191 mov %r12, R64_R12(%rsp)
192 mov %r13, R64_R13(%rsp)
193 mov %r14, R64_R14(%rsp)
194 mov %r15, R64_R15(%rsp)
196 /* cr2 is significant only for page-faults */
198 mov %rax, R64_CR2(%rsp)
200 /* Other registers (which may contain syscall args) */
201 mov %rdi, R64_RDI(%rsp) /* arg0 .. */
202 mov %rsi, R64_RSI(%rsp)
203 mov %rdx, R64_RDX(%rsp)
204 mov %r10, R64_R10(%rsp)
205 mov %r8, R64_R8(%rsp)
206 mov %r9, R64_R9(%rsp) /* .. arg5 */
208 mov R64_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */
209 mov R64_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */
210 mov R64_CS(%rsp), %esi /* %esi := cs for later */
212 jmp L_common_dispatch
214 L_64bit_entry_reject:
216 * Here for a 64-bit user attempting an invalid kernel entry.
219 leaq HNDL_ALLTRAPS(%rip), %rax
220 movq %rax, ISF64_TRAPFN+8(%rsp)
222 movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
227 * Check we're not a confused 64-bit user.
229 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
230 jne L_64bit_entry_reject
231 /* fall through to 32-bit handler: */
233 L_32bit_dispatch: /* 32-bit user task */
234 subq $(ISC32_OFFSET), %rsp
235 movl $(SS_32), SS_FLAVOR(%rsp)
241 movl %ds, R32_DS(%rsp)
242 movl %es, R32_ES(%rsp)
243 movl %fs, R32_FS(%rsp)
244 movl %gs, R32_GS(%rsp)
247 * Save general 32-bit registers
249 mov %eax, R32_EAX(%rsp)
250 mov %ebx, R32_EBX(%rsp)
251 mov %ecx, R32_ECX(%rsp)
252 mov %edx, R32_EDX(%rsp)
253 mov %ebp, R32_EBP(%rsp)
254 mov %esi, R32_ESI(%rsp)
255 mov %edi, R32_EDI(%rsp)
257 /* Unconditionally save cr2; only meaningful on page faults */
259 mov %eax, R32_CR2(%rsp)
262 * Copy registers already saved in the machine state
263 * (in the interrupt stack frame) into the compat save area.
265 mov ISC32_RIP(%rsp), %eax
266 mov %eax, R32_EIP(%rsp)
267 mov ISC32_RFLAGS(%rsp), %eax
268 mov %eax, R32_EFLAGS(%rsp)
269 mov ISC32_RSP(%rsp), %eax
270 mov %eax, R32_UESP(%rsp)
271 mov ISC32_SS(%rsp), %eax
272 mov %eax, R32_SS(%rsp)
273 L_32bit_dispatch_after_fault:
274 mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */
275 mov %esi, R32_CS(%rsp)
276 mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */
277 mov %ebx, R32_TRAPNO(%rsp)
278 mov ISC32_ERR(%rsp), %eax
279 mov %eax, R32_ERR(%rsp)
280 mov ISC32_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */
284 * On entering the kernel, we don't need to switch cr3
285 * because the kernel shares the user's address space.
286 * But we mark the kernel's cr3 as "active".
287 * If, however, the invalid cr3 flag is set, we have to flush tlbs
288 * since the kernel's mapping was changed while we were in userspace.
290 * But: if global no_shared_cr3 is TRUE we do switch to the kernel's cr3
291 * so that illicit accesses to userspace can be trapped.
293 mov %gs:CPU_KERNEL_CR3, %rcx
294 mov %rcx, %gs:CPU_ACTIVE_CR3
295 test $3, %esi /* user/kernel? */
296 jz 1f /* skip cr3 reload from kernel */
298 cmpl $0, EXT(no_shared_cr3)(%rip)
300 mov %rcx, %cr3 /* load kernel cr3 */
301 jmp 2f /* and skip tlb flush test */
303 mov %gs:CPU_ACTIVE_CR3+4, %rcx
307 movl $0, %gs:CPU_TLB_INVALID
308 testl $(1<<16), %ecx /* Global? */
310 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
311 and $(~CR4_PGE), %rcx
320 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
321 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
323 xor %ecx, %ecx /* If so, reset DR7 (the control) */
326 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
327 /* Dispatch the designated handler */
331 * Control is passed here to return to user.
333 Entry(return_to_user)
337 // XXX 'Be nice to tidy up this debug register restore sequence...
338 mov %gs:CPU_ACTIVE_THREAD, %rdx
339 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
341 test %rax, %rax /* Is there a debug register context? */
342 je 2f /* branch if not */
343 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
345 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
347 movl DS_DR1(%rax), %ecx
349 movl DS_DR2(%rax), %ecx
351 movl DS_DR3(%rax), %ecx
353 movl DS_DR7(%rax), %ecx
354 movq %rcx, %gs:CPU_DR7
357 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
359 mov DS64_DR1(%rax), %rcx
361 mov DS64_DR2(%rax), %rcx
363 mov DS64_DR3(%rax), %rcx
365 mov DS64_DR7(%rax), %rcx
366 mov %rcx, %gs:CPU_DR7
369 * On exiting the kernel there's no need to switch cr3 since we're
370 * already running in the user's address space which includes the
371 * kernel. Nevertheless, we now mark the task's cr3 as active.
372 * But, if no_shared_cr3 is set, we do need to switch cr3 at this point.
374 mov %gs:CPU_TASK_CR3, %rcx
375 mov %rcx, %gs:CPU_ACTIVE_CR3
376 movl EXT(no_shared_cr3)(%rip), %eax
377 test %eax, %eax /* -no_shared_cr3 */
381 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
384 mov %rax, %dr7 /* Set DR7 */
387 cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */
392 cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */
396 CCALL1(panic_idt64, %rsp)
398 #endif /* DEBUG_IDT64 */
401 * Restore registers into the machine state for iret.
403 movl R32_EIP(%rsp), %eax
404 movl %eax, ISC32_RIP(%rsp)
405 movl R32_EFLAGS(%rsp), %eax
406 movl %eax, ISC32_RFLAGS(%rsp)
407 movl R32_CS(%rsp), %eax
408 movl %eax, ISC32_CS(%rsp)
409 movl R32_UESP(%rsp), %eax
410 movl %eax, ISC32_RSP(%rsp)
411 movl R32_SS(%rsp), %eax
412 movl %eax, ISC32_SS(%rsp)
415 * Restore general 32-bit registers
417 movl R32_EAX(%rsp), %eax
418 movl R32_EBX(%rsp), %ebx
419 movl R32_ECX(%rsp), %ecx
420 movl R32_EDX(%rsp), %edx
421 movl R32_EBP(%rsp), %ebp
422 movl R32_ESI(%rsp), %esi
423 movl R32_EDI(%rsp), %edi
426 * Restore segment registers. We make take an exception here but
427 * we've got enough space left in the save frame area to absorb
428 * a hardware frame plus the trapfn and trapno
432 movl R32_DS(%rsp), %ds
434 movl R32_ES(%rsp), %es
436 movl R32_FS(%rsp), %fs
438 movl R32_GS(%rsp), %gs
440 /* pop compat frame + trapno, trapfn and error */
441 add $(ISC32_OFFSET)+8+8+8, %rsp
442 cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
443 /* test for fast entry/exit */
446 iretq /* return from interrupt */
449 pop %rdx /* user return eip */
450 pop %rcx /* pop and toss cs */
451 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
452 popf /* flags - carry denotes failure */
453 pop %rcx /* user return esp */
454 sti /* interrupts enabled after sysexit */
455 .byte 0x0f,0x35 /* 32-bit sysexit */
459 cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */
463 CCALL1(panic_idt64, %rsp)
466 cmpl $(KERNEL64_CS), R64_CS(%rsp)
468 CCALL1(panic_idt64, %rsp)
474 testb $3, R64_CS(%rsp) /* returning to user-space? */
480 * Restore general 64-bit registers
482 mov R64_R15(%rsp), %r15
483 mov R64_R14(%rsp), %r14
484 mov R64_R13(%rsp), %r13
485 mov R64_R12(%rsp), %r12
486 mov R64_R11(%rsp), %r11
487 mov R64_R10(%rsp), %r10
488 mov R64_R9(%rsp), %r9
489 mov R64_R8(%rsp), %r8
490 mov R64_RSI(%rsp), %rsi
491 mov R64_RDI(%rsp), %rdi
492 mov R64_RBP(%rsp), %rbp
493 mov R64_RDX(%rsp), %rdx
494 mov R64_RBX(%rsp), %rbx
495 mov R64_RCX(%rsp), %rcx
496 mov R64_RAX(%rsp), %rax
498 add $(ISS64_OFFSET)+24, %rsp /* pop saved state frame +
499 trapno + trapfn and error */
500 cmpl $(SYSCALL_CS),ISF64_CS-24(%rsp)
501 /* test for fast entry/exit */
505 iretq /* return from interrupt */
509 * Here to load rcx/r11/rsp and perform the sysret back to user-space.
512 * rsp user stack pointer
514 mov ISF64_RIP-24(%rsp), %rcx
515 mov ISF64_RFLAGS-24(%rsp), %r11
516 mov ISF64_RSP-24(%rsp), %rsp
517 sysretq /* return from systen call */
522 * System call handlers.
523 * These are entered via a syscall interrupt. The system call number in %rax
524 * is saved to the error code slot in the stack frame. We then branch to the
525 * common state saving code.
529 #error NO UNIX INT!!!
531 Entry(idt64_unix_scall)
532 swapgs /* switch to kernel gs (cpu_data) */
533 pushq %rax /* save system call number */
534 PUSH_FUNCTION(HNDL_UNIX_SCALL)
536 jmp L_32bit_entry_check
539 Entry(idt64_mach_scall)
540 swapgs /* switch to kernel gs (cpu_data) */
541 pushq %rax /* save system call number */
542 PUSH_FUNCTION(HNDL_MACH_SCALL)
544 jmp L_32bit_entry_check
547 Entry(idt64_mdep_scall)
548 swapgs /* switch to kernel gs (cpu_data) */
549 pushq %rax /* save system call number */
550 PUSH_FUNCTION(HNDL_MDEP_SCALL)
552 jmp L_32bit_entry_check
557 swapgs /* Kapow! get per-cpu data area */
558 mov %rsp, %gs:CPU_UBER_TMP /* save user stack */
559 mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */
562 * Save values in the ISF frame in the PCB
563 * to cons up the saved machine state.
565 movl $(USER_DS), ISF64_SS(%rsp)
566 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
567 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
568 mov %rcx, ISF64_RIP(%rsp) /* rip */
569 mov %gs:CPU_UBER_TMP, %rcx
570 mov %rcx, ISF64_RSP(%rsp) /* user stack */
571 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
572 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
573 leaq HNDL_SYSCALL(%rip), %r11;
574 movq %r11, ISF64_TRAPFN(%rsp)
575 mov ISF64_RFLAGS(%rsp), %r11 /* Avoid info leak,restore R11 */
576 jmp L_64bit_dispatch /* this can only be a 64-bit task */
579 * sysenter entry point
580 * Requires user code to set up:
581 * edx: user instruction pointer (return address)
582 * ecx: user stack pointer
583 * on which is pushed stub ret addr and saved ebx
584 * Return to user-space is made using sysexit.
585 * Note: sysenter/sysexit cannot be used for calls returning a value in edx,
586 * or requiring ecx to be preserved.
589 Entry(idt64_sysenter)
592 * Push values on to the PCB stack
593 * to cons up the saved machine state.
595 push $(USER_DS) /* ss */
599 * Clear, among others, the Nested Task (NT) flags bit;
600 * this is zeroed by INT, but not by SYSENTER.
604 push $(SYSENTER_CS) /* cs */
606 swapgs /* switch to kernel gs (cpu_data) */
608 push %rax /* err/eax - syscall code */
609 PUSH_FUNCTION(HNDL_SYSENTER)
611 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
612 jmp L_32bit_entry_check
615 Entry(idt64_page_fault)
616 PUSH_FUNCTION(HNDL_ALLTRAPS)
618 push %rax /* save %rax temporarily */
619 leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax
620 cmp %rax, 8+ISF64_RIP(%rsp) /* fault during copy args? */
621 je 1f /* - yes, handle copy arg fault */
622 testb $3, 8+ISF64_CS(%rsp) /* was trap from kernel? */
623 jz L_kernel_trap /* - yes, handle with care */
624 pop %rax /* restore %rax, swapgs, and continue */
626 jmp L_dispatch_continue
628 add $(8+ISF64_SIZE), %rsp /* remove entire intr stack frame */
629 jmp L_copy_args_continue /* continue system call entry */
633 * Debug trap. Check for single-stepping across system call into
634 * kernel. If this is the case, taking the debug trap has turned
635 * off single-stepping - save the flags register with the trace
639 push $0 /* error code */
640 PUSH_FUNCTION(HNDL_ALLTRAPS)
643 testb $3, ISF64_CS(%rsp)
647 * trap came from kernel mode
650 push %rax /* save %rax temporarily */
651 lea EXT(idt64_sysenter)(%rip), %rax
652 cmp %rax, ISF64_RIP+8(%rsp)
656 * Interrupt stack frame has been pushed on the temporary stack.
657 * We have to switch to pcb stack and patch up the saved state.
659 mov %rcx, ISF64_ERR(%rsp) /* save %rcx in error slot */
660 mov ISF64_SS+8(%rsp), %rcx /* top of temp stack -> pcb stack */
661 xchg %rcx,%rsp /* switch to pcb stack */
662 push $(USER_DS) /* ss */
663 push ISF64_ERR(%rcx) /* saved %rcx into rsp slot */
664 push ISF64_RFLAGS(%rcx) /* rflags */
665 push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */
666 mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
667 jmp L_sysenter_continue /* continue sysenter entry */
670 Entry(idt64_double_fault)
671 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
672 pushq $(T_DOUBLE_FAULT)
675 leaq EXT(idt64_syscall)(%rip), %rax
676 cmp %rax, ISF64_RIP+8(%rsp)
680 mov ISF64_RSP(%rsp), %rsp
681 jmp L_syscall_continue
685 * General protection or segment-not-present fault.
686 * Check for a GP/NP fault in the kernel_return
687 * sequence; if there, report it as a GP/NP fault on the user's instruction.
689 * rsp-> 0 ISF64_TRAPNO: trap code (NP or GP)
690 * 8 ISF64_TRAPFN: trap function
691 * 16 ISF64_ERR: segment number in error (error code)
694 * 40 ISF64_RFLAGS: rflags
697 * 64: old registers (trap is from kernel)
699 Entry(idt64_gen_prot)
700 PUSH_FUNCTION(HNDL_ALLTRAPS)
701 pushq $(T_GENERAL_PROTECTION)
702 jmp trap_check_kernel_exit /* check for kernel exit sequence */
704 Entry(idt64_stack_fault)
705 PUSH_FUNCTION(HNDL_ALLTRAPS)
706 pushq $(T_STACK_FAULT)
707 jmp trap_check_kernel_exit /* check for kernel exit sequence */
710 PUSH_FUNCTION(HNDL_ALLTRAPS)
711 pushq $(T_SEGMENT_NOT_PRESENT)
712 /* indicate fault type */
713 trap_check_kernel_exit:
714 testb $3,ISF64_CS(%rsp)
717 * trap was from kernel mode,
718 * so check for the kernel exit sequence
722 leaq EXT(ret32_iret)(%rip), %rax
723 cmp %rax, 8+ISF64_RIP(%rsp)
725 leaq EXT(ret64_iret)(%rip), %rax
726 cmp %rax, 8+ISF64_RIP(%rsp)
728 leaq EXT(ret32_set_ds)(%rip), %rax
729 cmp %rax, 8+ISF64_RIP(%rsp)
730 je L_32bit_fault_set_seg
731 leaq EXT(ret32_set_es)(%rip), %rax
732 cmp %rax, 8+ISF64_RIP(%rsp)
733 je L_32bit_fault_set_seg
734 leaq EXT(ret32_set_fs)(%rip), %rax
735 cmp %rax, 8+ISF64_RIP(%rsp)
736 je L_32bit_fault_set_seg
737 leaq EXT(ret32_set_gs)(%rip), %rax
738 cmp %rax, 8+ISF64_RIP(%rsp)
739 je L_32bit_fault_set_seg
741 leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax
742 cmp %rax, 8+ISF64_RIP(%rsp)
743 cmove 8+ISF64_RSP(%rsp), %rsp
744 je L_copy_args_continue
750 * Here after taking an unexpected trap from kernel mode - perhaps
751 * while running in the trampolines hereabouts.
752 * Note: %rax has been pushed on stack.
753 * Make sure we're not on the PCB stack, if so move to the kernel stack.
754 * This is likely a fatal condition.
755 * But first, try to ensure we have the kernel gs base active...
757 movq %gs:CPU_THIS, %rax /* get gs_base into %rax */
758 test %rax, %rax /* test sign bit (MSB) */
759 js 1f /* -ve kernel addr, no swap */
760 swapgs /* +ve user addr, swap */
762 movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
764 cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
765 jb 2f /* - yes, deal with it */
766 pop %rax /* - no, restore %rax */
770 * Here if %rsp is in the PCB
771 * Copy the interrupt stack frame from PCB stack to kernel stack
773 movq %gs:CPU_KERNEL_STACK, %rax
775 pushq 8+ISF64_SS(%rax)
776 pushq 8+ISF64_RSP(%rax)
777 pushq 8+ISF64_RFLAGS(%rax)
778 pushq 8+ISF64_CS(%rax)
779 pushq 8+ISF64_RIP(%rax)
780 pushq 8+ISF64_ERR(%rax)
781 pushq 8+ISF64_TRAPFN(%rax)
782 pushq 8+ISF64_TRAPNO(%rax)
787 * GP/NP fault on IRET: CS or SS is in error.
788 * Note that the user ss is originally 16-byte aligned, we'd popped the
789 * stack back to contain just the rip/cs/rflags/rsp/ss before issuing the iret.
790 * On taking the GP/NP fault on the iret instruction, the stack is 16-byte
791 * aligned before pushed the interrupt frame. Hence, an 8-byte padding exists.
794 * (- rax saved above, which is immediately popped)
795 * 0 ISF64_TRAPNO: trap code (NP or GP)
796 * 8 ISF64_TRAPFN: trap function
797 * 16 ISF64_ERR: segment number in error (error code)
800 * 40 ISF64_RFLAGS: rflags
801 * 48 ISF64_RSP: rsp <-- new trapno
802 * 56 ISF64_SS: ss <-- new trapfn
803 * 64 pad8 <-- new errcode
808 * 104 user ss (16-byte aligned)
811 pop %rax /* recover saved %rax */
812 mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
813 mov ISF64_TRAPNO(%rsp), %rax
814 mov %rax, ISF64_RSP(%rsp) /* put in user trap number */
815 mov ISF64_TRAPFN(%rsp), %rax
816 mov %rax, ISF64_SS(%rsp) /* put in user trap function */
817 mov ISF64_ERR(%rsp), %rax /* get error code */
818 mov %rax, 8+ISF64_SS(%rsp) /* put in user errcode */
819 mov ISF64_RIP(%rsp), %rax /* restore rax */
820 add $(ISF64_RSP),%rsp /* reset to new trapfn */
821 /* now treat as fault from user */
825 * Fault restoring a segment register. All of the saved state is still
826 * on the stack untouched since we haven't yet moved the stack pointer.
828 L_32bit_fault_set_seg:
830 pop %rax /* toss saved %rax from stack */
831 mov ISF64_TRAPNO(%rsp), %rax
832 mov ISF64_TRAPFN(%rsp), %rcx
833 mov ISF64_ERR(%rsp), %rdx
834 mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
835 mov %rax,ISC32_TRAPNO(%rsp)
836 mov %rcx,ISC32_TRAPFN(%rsp)
837 mov %rdx,ISC32_ERR(%rsp)
838 /* now treat as fault from user */
839 /* except that all the state is */
840 /* already saved - we just have to */
841 /* move the trapno and error into */
842 /* the compatibility frame */
843 jmp L_32bit_dispatch_after_fault
847 * Fatal exception handlers:
849 Entry(idt64_db_task_dbl_fault)
850 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
851 pushq $(T_DOUBLE_FAULT)
854 Entry(idt64_db_task_stk_fault)
855 PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
856 pushq $(T_STACK_FAULT)
860 push $(0) /* Error */
861 PUSH_FUNCTION(HNDL_MACHINE_CHECK)
862 pushq $(T_MACHINE_CHECK)
866 /* All 'exceptions' enter hndl_alltraps:
867 * rsp -> x86_saved_state_t
870 * The rest of the state is set up as:
871 * interrupts disabled
872 * direction flag cleared
881 /* Check for active vtimers in the current task */
882 mov %gs:CPU_ACTIVE_THREAD, %rcx
883 mov TH_TASK(%rcx), %rbx
884 TASK_VTIMER_CHECK(%rbx, %rcx)
886 movq %rsp, %rdi /* also pass it as arg0 */
887 movq %gs:CPU_KERNEL_STACK,%rsp /* switch to kernel stack */
889 CCALL(user_trap) /* call user trap routine */
890 /* user_trap() unmasks interrupts */
891 cli /* hold off intrs - critical section */
892 xorl %ecx, %ecx /* don't check if we're in the PFZ */
897 Entry(return_from_trap)
898 movq %gs:CPU_ACTIVE_THREAD,%rsp
899 movq TH_PCB_ISS(%rsp), %rsp /* switch back to PCB stack */
900 movl %gs:CPU_PENDING_AST,%eax
902 je EXT(return_to_user) /* branch if no AST */
904 L_return_from_trap_with_ast:
906 movq %gs:CPU_KERNEL_STACK, %rsp
908 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
909 je 2f /* no, go handle the AST */
910 cmpl $(SS_64), SS_FLAVOR(%r13) /* are we a 64-bit task? */
912 /* no... 32-bit user mode */
913 movl R32_EIP(%r13), %edi
914 xorq %rbp, %rbp /* clear framepointer */
915 CCALL(commpage_is_in_pfz32)
917 je 2f /* not in the PFZ... go service AST */
918 movl %eax, R32_EBX(%r13) /* let the PFZ know we've pended an AST */
919 movq %r13, %rsp /* switch back to PCB stack */
920 jmp EXT(return_to_user)
922 movq R64_RIP(%r13), %rdi
923 xorq %rbp, %rbp /* clear framepointer */
924 CCALL(commpage_is_in_pfz64)
926 je 2f /* not in the PFZ... go service AST */
927 movl %eax, R64_RBX(%r13) /* let the PFZ know we've pended an AST */
928 movq %r13, %rsp /* switch back to PCB stack */
929 jmp EXT(return_to_user)
931 STI /* interrupts always enabled on return to user mode */
933 xor %edi, %edi /* zero %rdi */
934 xorq %rbp, %rbp /* clear framepointer */
935 CCALL(i386_astintr) /* take the AST */
938 xorl %ecx, %ecx /* don't check if we're in the PFZ */
939 jmp EXT(return_from_trap) /* and check again (rare) */
942 * Trap from kernel mode. No need to switch stacks.
943 * Interrupts must be off here - we will set them to state at time of trap
944 * as soon as it's safe for us to do so and not recurse doing preemption
949 movq %rsp, %rdi /* saved state addr */
950 pushq R64_RIP(%rsp) /* Simulate a CALL from fault point */
951 pushq %rbp /* Extend framepointer chain */
953 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
958 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
959 testl $(AST_URGENT),%eax /* any urgent preemption? */
960 je ret_to_kernel /* no, nothing to do */
961 cmpl $(T_PREEMPT),R64_TRAPNO(%rsp)
962 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
963 testl $(EFL_IF),R64_RFLAGS(%rsp) /* interrupts disabled? */
965 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
967 movq %gs:CPU_KERNEL_STACK,%rax
970 andq EXT(kernel_stack_mask)(%rip),%rcx
971 testq %rcx,%rcx /* are we on the kernel stack? */
972 jne ret_to_kernel /* no, skip it */
974 CCALL1(i386_astintr, $1) /* take the AST */
979 * All interrupts on all tasks enter here with:
980 * rsp-> x86_saved_state_t
983 * interrupts disabled
984 * direction flag cleared
988 * test whether already on interrupt stack
990 movq %gs:CPU_INT_STACK_TOP,%rcx
993 leaq -INTSTACK_SIZE(%rcx),%rdx
997 xchgq %rcx,%rsp /* switch to interrupt stack */
999 mov %cr0,%rax /* get cr0 */
1000 orl $(CR0_TS),%eax /* or in TS bit */
1001 mov %rax,%cr0 /* set cr0 */
1003 subq $8, %rsp /* for 16-byte stack alignment */
1004 pushq %rcx /* save pointer to old stack */
1005 movq %rcx,%gs:CPU_INT_STATE /* save intr state */
1007 TIME_INT_ENTRY /* do timing */
1009 /* Check for active vtimers in the current task */
1010 mov %gs:CPU_ACTIVE_THREAD, %rcx
1011 mov TH_TASK(%rcx), %rbx
1012 TASK_VTIMER_CHECK(%rbx, %rcx)
1014 incl %gs:CPU_PREEMPTION_LEVEL
1015 incl %gs:CPU_INTERRUPT_LEVEL
1017 movq %gs:CPU_INT_STATE, %rdi
1019 CCALL(interrupt) /* call generic interrupt routine */
1021 cli /* just in case we returned with intrs enabled */
1023 movq %rax,%gs:CPU_INT_STATE /* clear intr state pointer */
1025 .globl EXT(return_to_iret)
1026 LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1028 decl %gs:CPU_INTERRUPT_LEVEL
1029 decl %gs:CPU_PREEMPTION_LEVEL
1031 TIME_INT_EXIT /* do timing */
1033 movq %gs:CPU_ACTIVE_THREAD,%rax
1034 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
1035 cmpq $0,%rax /* Is there a context */
1036 je 1f /* Branch if not */
1037 movl FP_VALID(%rax),%eax /* Load fp_valid */
1038 cmpl $0,%eax /* Check if valid */
1039 jne 1f /* Branch if valid */
1043 mov %cr0,%rax /* get cr0 */
1044 orl $(CR0_TS),%eax /* or in TS bit */
1045 mov %rax,%cr0 /* set cr0 */
1047 popq %rsp /* switch back to old stack */
1049 /* Load interrupted code segment into %eax */
1050 movl R32_CS(%rsp),%eax /* assume 32-bit state */
1051 cmpl $(SS_64),SS_FLAVOR(%rsp)/* 64-bit? */
1054 movl R64_CS(%rsp),%eax /* 64-bit user mode */
1057 cmpl $(SS_32),SS_FLAVOR(%rsp)
1060 CCALL1(panic_idt64, %rsp)
1064 movl R64_CS(%rsp),%eax /* 64-bit user mode */
1067 testb $3,%al /* user mode, */
1068 jnz ast_from_interrupt_user /* go handle potential ASTs */
1070 * we only want to handle preemption requests if
1071 * the interrupt fell in the kernel context
1072 * and preemption isn't disabled
1074 movl %gs:CPU_PENDING_AST,%eax
1075 testl $(AST_URGENT),%eax /* any urgent requests? */
1076 je ret_to_kernel /* no, nothing to do */
1078 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1079 jne ret_to_kernel /* yes, skip it */
1081 movq %gs:CPU_KERNEL_STACK,%rax
1084 andq EXT(kernel_stack_mask)(%rip),%rcx
1085 testq %rcx,%rcx /* are we on the kernel stack? */
1086 jne ret_to_kernel /* no, skip it */
1089 * Take an AST from kernel space. We don't need (and don't want)
1090 * to do as much as the case where the interrupt came from user
1093 CCALL1(i386_astintr, $1)
1099 * nested int - simple path, can't preempt etc on way out
1102 incl %gs:CPU_PREEMPTION_LEVEL
1103 incl %gs:CPU_INTERRUPT_LEVEL
1104 incl %gs:CPU_NESTED_ISTACK
1105 mov %rsp, %rdi /* x86_saved_state */
1108 decl %gs:CPU_INTERRUPT_LEVEL
1109 decl %gs:CPU_PREEMPTION_LEVEL
1110 decl %gs:CPU_NESTED_ISTACK
1112 CCALL1(panic_idt64, %rsp)
1119 * Take an AST from an interrupted user
1121 ast_from_interrupt_user:
1122 movl %gs:CPU_PENDING_AST,%eax
1123 testl %eax,%eax /* pending ASTs? */
1124 je EXT(ret_to_user) /* no, nothing to do */
1128 movl $1, %ecx /* check if we're in the PFZ */
1129 jmp L_return_from_trap_with_ast /* return */
1132 /* Syscall dispatch routines! */
1137 * System call entries via INTR_GATE or sysenter:
1139 * rsp -> x86_saved_state32_t
1140 * interrupts disabled
1141 * direction flag cleared
1144 Entry(hndl_sysenter)
1146 * We can be here either for a mach syscall or a unix syscall,
1147 * as indicated by the sign of the code:
1149 movl R32_EAX(%rsp),%eax
1151 js EXT(hndl_mach_scall) /* < 0 => mach */
1154 Entry(hndl_unix_scall)
1155 /* If the caller (typically LibSystem) has recorded the cumulative size of
1156 * the arguments in EAX, copy them over from the user stack directly.
1157 * We recover from exceptions inline--if the copy loop doesn't complete
1158 * due to an exception, we fall back to copyin from compatibility mode.
1159 * We can potentially extend this mechanism to mach traps as well (DRK).
1161 testl $(I386_SYSCALL_ARG_BYTES_MASK), %eax
1162 jz L_copy_args_continue
1164 mov %gs:CPU_UBER_ARG_STORE_VALID, %rbx
1165 shrl $(I386_SYSCALL_ARG_DWORDS_SHIFT), %ecx
1166 andl $(I386_SYSCALL_ARG_DWORDS_MASK), %ecx
1167 mov %gs:CPU_UBER_ARG_STORE, %rdi
1168 mov ISC32_RSP(%rsp), %rsi
1172 EXT(idt64_unix_scall_copy_args):
1175 L_copy_args_continue:
1179 movq %gs:CPU_KERNEL_STACK,%rdi
1180 xchgq %rdi,%rsp /* switch to kernel stack */
1181 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1182 movq TH_TASK(%rcx),%rbx /* point to current task */
1183 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1185 /* Check for active vtimers in the current task */
1186 TASK_VTIMER_CHECK(%rbx,%rcx)
1192 * always returns through thread_exception_return
1196 Entry(hndl_mach_scall)
1199 movq %gs:CPU_KERNEL_STACK,%rdi
1200 xchgq %rdi,%rsp /* switch to kernel stack */
1201 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1202 movq TH_TASK(%rcx),%rbx /* point to current task */
1203 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1205 /* Check for active vtimers in the current task */
1206 TASK_VTIMER_CHECK(%rbx,%rcx)
1210 CCALL(mach_call_munger)
1212 * always returns through thread_exception_return
1216 Entry(hndl_mdep_scall)
1219 movq %gs:CPU_KERNEL_STACK,%rdi
1220 xchgq %rdi,%rsp /* switch to kernel stack */
1222 /* Check for active vtimers in the current task */
1223 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1224 movq TH_TASK(%rcx),%rbx /* point to current task */
1225 TASK_VTIMER_CHECK(%rbx,%rcx)
1229 CCALL(machdep_syscall)
1231 * always returns through thread_exception_return
1236 * System call entries via syscall only:
1238 * rsp -> x86_saved_state64_t
1239 * interrupts disabled
1240 * direction flag cleared
1246 movq %gs:CPU_KERNEL_STACK,%rdi
1247 xchgq %rdi,%rsp /* switch to kernel stack */
1248 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1249 movq TH_TASK(%rcx),%rbx /* point to current task */
1251 /* Check for active vtimers in the current task */
1252 TASK_VTIMER_CHECK(%rbx,%rcx)
1255 * We can be here either for a mach, unix machdep or diag syscall,
1256 * as indicated by the syscall class:
1258 movl R64_RAX(%rdi), %eax /* syscall number/class */
1260 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1261 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1262 je EXT(hndl_mach_scall64)
1263 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1264 je EXT(hndl_unix_scall64)
1265 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1266 je EXT(hndl_mdep_scall64)
1267 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1268 je EXT(hndl_diag_scall64)
1270 /* Syscall class unknown */
1272 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1276 Entry(hndl_unix_scall64)
1277 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1280 CCALL(unix_syscall64)
1282 * always returns through thread_exception_return
1286 Entry(hndl_mach_scall64)
1287 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1290 CCALL(mach_call_munger64)
1292 * always returns through thread_exception_return
1297 Entry(hndl_mdep_scall64)
1300 CCALL(machdep_syscall64)
1302 * always returns through thread_exception_return
1305 Entry(hndl_diag_scall64)
1306 pushq %rdi // Push the previous stack
1307 CCALL(diagCall64) // Call diagnostics
1308 cli // Disable interruptions just in case
1309 test %eax, %eax // What kind of return is this?
1310 je 1f // - branch if bad (zero)
1311 popq %rsp // Get back the pcb stack
1312 jmp EXT(return_to_user) // Normal return, do not check asts...
1315 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1318 Entry(hndl_machine_check)
1319 CCALL1(panic_machine_check64, %rsp)
1322 Entry(hndl_double_fault)
1323 CCALL1(panic_double_fault64, %rsp)