]> git.saurik.com Git - apple/xnu.git/blob - osfmk/x86_64/idt64.s
54a43ec98f2aca958837461a41d017026a2fd450
[apple/xnu.git] / osfmk / x86_64 / idt64.s
1 /*
2 * Copyright (c) 2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <i386/asm.h>
29 #include <assym.s>
30 #include <debug.h>
31 #include <i386/eflags.h>
32 #include <i386/rtclock_asm.h>
33 #include <i386/trap.h>
34 #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
35 #include <mach/i386/syscall_sw.h>
36 #include <i386/postcode.h>
37 #include <i386/proc_reg.h>
38 #include <mach/exception_types.h>
39
40 #if DEBUG
41 #define DEBUG_IDT64 1
42 #endif
43
44 /*
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
48 *
49 * Code here is structured as follows:
50 *
51 * stubs Code called directly from an IDT vector.
52 * All entry points have the "idt64_" prefix and they are built
53 * using macros expanded by the inclusion of idt_table.h.
54 * This code performs vector-dependent identification and jumps
55 * into the dispatch code.
56 *
57 * dispatch The dispatch code is responsible for saving the thread state
58 * (which is either 64-bit or 32-bit) and then jumping to the
59 * class handler identified by the stub.
60 *
61 * returns Code to restore state and return to the previous context.
62 *
63 * handlers There are several classes of handlers:
64 * interrupt - asynchronous events typically from external devices
65 * trap - synchronous events due to thread execution
66 * syscall - synchronous system call request
67 * fatal - fatal traps
68 */
69 /*
70 * Indices of handlers for each exception type.
71 */
72 #define HNDL_ALLINTRS 0
73 #define HNDL_ALLTRAPS 1
74 #define HNDL_SYSENTER 2
75 #define HNDL_SYSCALL 3
76 #define HNDL_UNIX_SCALL 4
77 #define HNDL_MACH_SCALL 5
78 #define HNDL_MDEP_SCALL 6
79 #define HNDL_DOUBLE_FAULT 7
80 #define HNDL_MACHINE_CHECK 8
81
82 /* Begin double-mapped descriptor section */
83
84 .section __HIB, __desc
85 .globl EXT(idt64_hndl_table0)
86 EXT(idt64_hndl_table0):
87 .quad EXT(ks_dispatch)
88 .quad EXT(ks_64bit_return)
89 .quad 0 /* Populated with CPU shadow displacement*/
90 .quad EXT(ks_return)
91
92 EXT(idt64_hndl_table1):
93 .quad EXT(hndl_allintrs)
94 .quad EXT(hndl_alltraps)
95 .quad EXT(hndl_sysenter)
96 .quad EXT(hndl_syscall)
97 .quad EXT(hndl_unix_scall)
98 .quad EXT(hndl_mach_scall)
99 .quad EXT(hndl_mdep_scall)
100 .quad EXT(hndl_double_fault)
101 .quad EXT(hndl_machine_check)
102 .text
103
104
105 /* The wrapper for all non-special traps/interrupts */
106 /* Everything up to PUSH_FUNCTION is just to output
107 * the interrupt number out to the postcode display
108 */
109 #if DEBUG_IDT64
110 #define IDT_ENTRY_WRAPPER(n, f) \
111 push %rax ;\
112 POSTCODE2(0x6400+n) ;\
113 pop %rax ;\
114 pushq $(f) ;\
115 pushq $(n) ;\
116 jmp L_dispatch
117 #else
118 #define IDT_ENTRY_WRAPPER(n, f) \
119 pushq $(f) ;\
120 pushq $(n) ;\
121 jmp L_dispatch
122 #endif
123
124 /* A trap that comes with an error code already on the stack */
125 #define TRAP_ERR(n, f) \
126 Entry(f) ;\
127 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
128
129 /* A normal trap */
130 #define TRAP(n, f) \
131 Entry(f) ;\
132 pushq $0 ;\
133 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
134
135 #define USER_TRAP TRAP
136
137 /* An interrupt */
138 #define INTERRUPT(n) \
139 Entry(_intr_ ## n) ;\
140 pushq $0 ;\
141 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
142
143 /* A trap with a special-case handler, hence we don't need to define anything */
144 #define TRAP_SPC(n, f)
145 #define TRAP_IST1(n, f)
146 #define TRAP_IST2(n, f)
147 #define USER_TRAP_SPC(n, f)
148
149 /* Begin double-mapped text section */
150 .section __HIB, __text
151 /* Generate all the stubs */
152 #include "idt_table.h"
153
154 Entry(idt64_page_fault)
155 pushq $(HNDL_ALLTRAPS)
156 push $(T_PAGE_FAULT)
157 jmp L_dispatch
158
159 /*
160 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the
161 * kernel while not on the kernel's gsbase.
162 */
163 Entry(idt64_debug)
164 /* Synthesize common interrupt stack frame */
165 push $0 /* error code */
166 pushq $(HNDL_ALLTRAPS)
167 pushq $(T_DEBUG)
168 /* Spill prior to RDMSR */
169 push %rax
170 push %rcx
171 push %rdx
172 mov $(MSR_IA32_GS_BASE), %ecx
173 rdmsr /* Check contents of GSBASE MSR */
174 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */
175 jnz 1f
176
177 /*
178 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space,
179 * it must have happened within the very small window on entry or exit before or after (respectively)
180 * swapgs occurred. In those cases, consider the #DB spurious and immediately return.
181 */
182 testb $3, 8+8+8+ISF64_CS(%rsp)
183 jnz 2f
184 pop %rdx
185 pop %rcx
186 pop %rax
187 addq $0x18, %rsp /* Remove synthesized interrupt stack frame */
188 jmp EXT(ret64_iret)
189 2:
190 swapgs /* direct from user */
191 1:
192 pop %rdx
193
194 leaq EXT(idt64_hndl_table0)(%rip), %rax
195 mov 16(%rax), %rax /* Offset of per-CPU shadow */
196 mov %gs:CPU_TASK_CR3(%rax), %rax
197 mov %rax, %cr3
198
199 pop %rcx
200
201 /* Note that %rax will be popped from the stack in ks_dispatch, below */
202
203 leaq EXT(idt64_hndl_table0)(%rip), %rax
204 jmp *(%rax)
205
206 /*
207 * Legacy interrupt gate System call handlers.
208 * These are entered via a syscall interrupt. The system call number in %rax
209 * is saved to the error code slot in the stack frame. We then branch to the
210 * common state saving code.
211 */
212
213 #ifndef UNIX_INT
214 #error NO UNIX INT!!!
215 #endif
216 Entry(idt64_unix_scall)
217 pushq %rax /* save system call number */
218 pushq $(HNDL_UNIX_SCALL)
219 pushq $(UNIX_INT)
220 jmp L_dispatch
221
222 Entry(idt64_mach_scall)
223 pushq %rax /* save system call number */
224 pushq $(HNDL_MACH_SCALL)
225 pushq $(MACH_INT)
226 jmp L_dispatch
227
228 Entry(idt64_mdep_scall)
229 pushq %rax /* save system call number */
230 pushq $(HNDL_MDEP_SCALL)
231 pushq $(MACHDEP_INT)
232 jmp L_dispatch
233
234 /*
235 * For GP/NP/SS faults, we use the IST1 stack.
236 * For faults from user-space, we have to copy the machine state to the
237 * PCB stack and then dispatch as normal.
238 * For faults in kernel-space, we need to scrub for kernel exit faults and
239 * treat these as user-space faults. But for all other kernel-space faults
240 * we continue to run on the IST1 stack and we dispatch to handle the fault
241 * as fatal.
242 */
243 Entry(idt64_gen_prot)
244 pushq $(HNDL_ALLTRAPS)
245 pushq $(T_GENERAL_PROTECTION)
246 jmp L_dispatch
247
248 Entry(idt64_stack_fault)
249 pushq $(HNDL_ALLTRAPS)
250 pushq $(T_STACK_FAULT)
251 jmp L_dispatch
252
253 Entry(idt64_segnp)
254 pushq $(HNDL_ALLTRAPS)
255 pushq $(T_SEGMENT_NOT_PRESENT)
256 jmp L_dispatch
257
258 /*
259 * Fatal exception handlers:
260 */
261 Entry(idt64_db_task_dbl_fault)
262 pushq $(HNDL_DOUBLE_FAULT)
263 pushq $(T_DOUBLE_FAULT)
264 jmp L_dispatch
265
266 Entry(idt64_db_task_stk_fault)
267 pushq $(HNDL_DOUBLE_FAULT)
268 pushq $(T_STACK_FAULT)
269 jmp L_dispatch
270
271 Entry(idt64_mc)
272 push $(0) /* Error */
273 pushq $(HNDL_MACHINE_CHECK)
274 pushq $(T_MACHINE_CHECK)
275 jmp L_dispatch
276
277 /*
278 * NMI
279 * This may or may not be fatal but extreme care is required
280 * because it may fall when control was already in another trampoline.
281 *
282 * We get here on IST2 stack which is used exclusively for NMIs.
283 * Machine checks, doublefaults and similar use IST1
284 */
285 Entry(idt64_nmi)
286 /* Synthesize common interrupt stack frame */
287 pushq $0
288 pushq $(HNDL_ALLINTRS)
289 pushq $(T_NMI)
290 /* Spill prior to RDMSR */
291 push %rax
292 push %rcx
293 push %rdx
294 mov $(MSR_IA32_GS_BASE), %ecx
295 rdmsr /* Check contents of GSBASE MSR */
296 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */
297 jnz 44f
298 swapgs /* Either direct from user or within trampolines */
299 44:
300 pop %rdx
301 pop %rcx
302
303 leaq EXT(idt64_hndl_table0)(%rip), %rax
304 mov 16(%rax), %rax /* Offset of per-CPU shadow */
305 mov %gs:CPU_KERNEL_CR3(%rax), %rax
306 mov %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */
307 leaq EXT(idt64_hndl_table0)(%rip), %rax
308 jmp *(%rax)
309
310 Entry(idt64_double_fault)
311 pushq $(HNDL_DOUBLE_FAULT)
312 pushq $(T_DOUBLE_FAULT)
313 jmp L_dispatch
314
315 Entry(hi64_syscall)
316 Entry(idt64_syscall)
317 swapgs
318 /* Use RAX as a temporary by shifting its contents into R11[32:63]
319 * The systemcall number is defined to be a 32-bit quantity, as is
320 * RFLAGS.
321 */
322 shlq $32, %rax
323 or %rax, %r11
324 .globl EXT(dblsyscall_patch_point)
325 EXT(dblsyscall_patch_point):
326 // movabsq $0x12345678ABCDEFFFULL, %rax
327 /* Generate offset to the double-mapped per-CPU data shadow
328 * into RAX
329 */
330 leaq EXT(idt64_hndl_table0)(%rip), %rax
331 mov 16(%rax), %rax
332 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
333 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
334 sub $(ISF64_SIZE), %rsp
335
336 /*
337 * Synthesize an ISF frame on the exception stack
338 */
339 movl $(USER_DS), ISF64_SS(%rsp)
340 mov %rcx, ISF64_RIP(%rsp) /* rip */
341
342 mov %gs:CPU_UBER_TMP(%rax), %rcx
343 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
344
345 mov %r11, %rax
346 shrq $32, %rax /* Restore RAX */
347 mov %r11d, %r11d /* Clear r11[32:63] */
348
349 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
350 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
351 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
352 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
353 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
354 swapgs
355 jmp L_dispatch /* this can only be 64-bit */
356
357 Entry(hi64_sysenter)
358 Entry(idt64_sysenter)
359 /* Synthesize an interrupt stack frame onto the
360 * exception stack.
361 */
362 push $(USER_DS) /* ss */
363 push %rcx /* uesp */
364 pushf /* flags */
365 /*
366 * Clear, among others, the Nested Task (NT) flags bit;
367 * this is zeroed by INT, but not by SYSENTER.
368 */
369 push $0
370 popf
371 push $(SYSENTER_CS) /* cs */
372 L_sysenter_continue:
373 push %rdx /* eip */
374 push %rax /* err/eax - syscall code */
375 pushq $(HNDL_SYSENTER)
376 pushq $(T_SYSENTER)
377 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
378 jmp L_dispatch
379
380 /*
381 * Common dispatch point.
382 * Determine what mode has been interrupted and save state accordingly.
383 * Here with:
384 * rsp from user-space: interrupt state in PCB, or
385 * from kernel-space: interrupt state in kernel or interrupt stack
386 * GSBASE from user-space: pthread area, or
387 * from kernel-space: cpu_data
388 */
389
390 L_dispatch:
391 pushq %rax
392 testb $3, 8+ISF64_CS(%rsp)
393 jz 1f
394 swapgs
395 leaq EXT(idt64_hndl_table0)(%rip), %rax
396 mov 16(%rax), %rax
397
398 mov %gs:CPU_TASK_CR3(%rax), %rax
399 mov %rax, %cr3
400 #if DEBUG
401 mov %rax, %gs:CPU_ENTRY_CR3
402 #endif
403 1:
404 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
405 leaq EXT(idt64_hndl_table0)(%rip), %rax
406 /* Indirect branch to non-doublemapped trampolines */
407 jmp *(%rax)
408 /* User return: register restoration and address space switch sequence */
409 Entry(ks_64bit_return)
410 mov R64_R14(%r15), %r14
411 mov R64_R13(%r15), %r13
412 mov R64_R12(%r15), %r12
413 mov R64_R11(%r15), %r11
414 mov R64_R10(%r15), %r10
415 mov R64_R9(%r15), %r9
416 mov R64_R8(%r15), %r8
417 mov R64_RSI(%r15), %rsi
418 mov R64_RDI(%r15), %rdi
419 mov R64_RBP(%r15), %rbp
420 mov R64_RDX(%r15), %rdx
421 mov R64_RCX(%r15), %rcx
422 mov R64_RBX(%r15), %rbx
423 mov R64_RAX(%r15), %rax
424 /* Switch to per-CPU exception stack */
425 mov %gs:CPU_ESTACK, %rsp
426
427 /* Synthesize interrupt stack frame from PCB savearea to exception stack */
428 push R64_SS(%r15)
429 push R64_RSP(%r15)
430 push R64_RFLAGS(%r15)
431 push R64_CS(%r15)
432 push R64_RIP(%r15)
433
434 mov R64_R15(%r15), %r15
435 cmpq $(KERNEL64_CS), 8(%rsp)
436 jz 1f
437 /* Discover user cr3/ASID */
438 push %rax
439 mov %gs:CPU_UCR3, %rax
440 #if DEBUG
441 mov %rax, %gs:CPU_EXIT_CR3
442 #endif
443 mov %rax, %cr3
444 /* Continue execution on the shared/doublemapped trampoline */
445 pop %rax
446 swapgs
447 1:
448 cmpl $(SYSCALL_CS), 8(%rsp) /* test for exit via SYSRET */
449 je L_sysret
450 EXT(ret64_iret):
451 iretq /* return from interrupt */
452 L_sysret:
453 /*
454 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
455 * rcx user rip
456 * r11 user rflags
457 * rsp user stack pointer
458 */
459 pop %rcx
460 add $8, %rsp
461 pop %r11
462 pop %rsp
463 sysretq /* return from system call */
464 /* End of double-mapped TEXT */
465 .text
466
467 Entry(ks_dispatch)
468 popq %rax
469 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
470 je EXT(ks_dispatch_kernel)
471
472 mov %rax, %gs:CPU_UBER_TMP
473 mov %gs:CPU_UBER_ISF, %rax
474 add $(ISF64_SIZE), %rax
475
476 xchg %rsp, %rax
477 /* Memory to memory moves (aint x86 wonderful):
478 * Transfer the exception frame from the per-CPU exception stack to the
479 * 'PCB' stack programmed at cswitch.
480 */
481 push ISF64_SS(%rax)
482 push ISF64_RSP(%rax)
483 push ISF64_RFLAGS(%rax)
484 push ISF64_CS(%rax)
485 push ISF64_RIP(%rax)
486 push ISF64_ERR(%rax)
487 push ISF64_TRAPFN(%rax)
488 push ISF64_TRAPNO(%rax)
489 mov %gs:CPU_UBER_TMP, %rax
490 jmp EXT(ks_dispatch_user)
491
492 Entry (ks_return)
493 jmp .
494
495 Entry(ks_dispatch_user)
496 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
497 je L_dispatch_U32 /* 32-bit user task */
498
499 L_dispatch_U64:
500 subq $(ISS64_OFFSET), %rsp
501 mov %r15, R64_R15(%rsp)
502 mov %rsp, %r15
503 mov %gs:CPU_KERNEL_STACK, %rsp
504 jmp L_dispatch_64bit
505
506 Entry(ks_dispatch_kernel)
507 subq $(ISS64_OFFSET), %rsp
508 mov %r15, R64_R15(%rsp)
509 mov %rsp, %r15
510
511 /*
512 * Here for 64-bit user task or kernel
513 */
514 L_dispatch_64bit:
515 movl $(SS_64), SS_FLAVOR(%r15)
516
517 /*
518 * Save segment regs - for completeness since theyre not used.
519 */
520 movl %fs, R64_FS(%r15)
521 movl %gs, R64_GS(%r15)
522
523 /* Save general-purpose registers */
524 mov %rax, R64_RAX(%r15)
525 mov %rbx, R64_RBX(%r15)
526 mov %rcx, R64_RCX(%r15)
527 mov %rdx, R64_RDX(%r15)
528 mov %rbp, R64_RBP(%r15)
529 mov %rdi, R64_RDI(%r15)
530 mov %rsi, R64_RSI(%r15)
531 mov %r8, R64_R8(%r15)
532 mov %r9, R64_R9(%r15)
533 mov %r10, R64_R10(%r15)
534 mov %r11, R64_R11(%r15)
535 mov %r12, R64_R12(%r15)
536 mov %r13, R64_R13(%r15)
537 mov %r14, R64_R14(%r15)
538
539 /* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
540 xor %ecx, %ecx
541 xor %edi, %edi
542 xor %r8, %r8
543 xor %r9, %r9
544 xor %r10, %r10
545 xor %r11, %r11
546 xor %r12, %r12
547 xor %r13, %r13
548 xor %r14, %r14
549
550 /* cr2 is significant only for page-faults */
551 mov %cr2, %rax
552 mov %rax, R64_CR2(%r15)
553
554 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
555 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
556 mov R64_CS(%r15), %esi /* %esi := cs for later */
557
558 jmp L_common_dispatch
559
560 L_64bit_entry_reject:
561 /*
562 * Here for a 64-bit user attempting an invalid kernel entry.
563 */
564 movq $(HNDL_ALLTRAPS), ISF64_TRAPFN(%rsp)
565 movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
566 jmp L_dispatch_U64
567
568 Entry(ks_32bit_entry_check)
569 /*
570 * Check we're not a confused 64-bit user.
571 */
572 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
573 jne L_64bit_entry_reject
574 /* fall through to 32-bit handler: */
575
576 L_dispatch_U32: /* 32-bit user task */
577 subq $(ISS64_OFFSET), %rsp
578 mov %rsp, %r15
579 mov %gs:CPU_KERNEL_STACK, %rsp
580 movl $(SS_32), SS_FLAVOR(%r15)
581
582 /*
583 * Save segment regs
584 */
585 movl %ds, R32_DS(%r15)
586 movl %es, R32_ES(%r15)
587 movl %fs, R32_FS(%r15)
588 movl %gs, R32_GS(%r15)
589
590 /*
591 * Save general 32-bit registers
592 */
593 mov %eax, R32_EAX(%r15)
594 mov %ebx, R32_EBX(%r15)
595 mov %ecx, R32_ECX(%r15)
596 mov %edx, R32_EDX(%r15)
597 mov %ebp, R32_EBP(%r15)
598 mov %esi, R32_ESI(%r15)
599 mov %edi, R32_EDI(%r15)
600
601 /* Unconditionally save cr2; only meaningful on page faults */
602 mov %cr2, %rax
603 mov %eax, R32_CR2(%r15)
604 /* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
605 xor %ecx, %ecx
606 xor %edi, %edi
607 xor %r8, %r8
608 xor %r9, %r9
609 xor %r10, %r10
610 xor %r11, %r11
611 xor %r12, %r12
612 xor %r13, %r13
613 xor %r14, %r14
614
615 /*
616 * Copy registers already saved in the machine state
617 * (in the interrupt stack frame) into the compat save area.
618 */
619 mov R64_RIP(%r15), %eax
620 mov %eax, R32_EIP(%r15)
621 mov R64_RFLAGS(%r15), %eax
622 mov %eax, R32_EFLAGS(%r15)
623 mov R64_RSP(%r15), %eax
624 mov %eax, R32_UESP(%r15)
625 mov R64_SS(%r15), %eax
626 mov %eax, R32_SS(%r15)
627 L_dispatch_U32_after_fault:
628 mov R64_CS(%r15), %esi /* %esi := %cs for later */
629 mov %esi, R32_CS(%r15)
630 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
631 mov %ebx, R32_TRAPNO(%r15)
632 mov R64_ERR(%r15), %eax
633 mov %eax, R32_ERR(%r15)
634 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
635
636 L_common_dispatch:
637 cld /* Ensure the direction flag is clear in the kernel */
638 cmpl $0, EXT(pmap_smap_enabled)(%rip)
639 je 1f
640 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
641 1:
642 /*
643 * On entering the kernel, we typically don't switch CR3
644 * because the kernel shares the user's address space.
645 * But we mark the kernel's cr3 as "active" for TLB coherency evaluation
646 * If, however, the CPU's invalid TLB flag is set, we have to invalidate the TLB
647 * since the kernel pagetables were changed while we were in userspace.
648 *
649 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
650 * we switch to the kernel's address space on entry. Also,
651 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
652 * so that illicit accesses to userspace can be trapped.
653 */
654 mov %gs:CPU_KERNEL_CR3, %rcx
655 mov %rcx, %gs:CPU_ACTIVE_CR3
656 test $3, %esi /* user/kernel? */
657 jz 2f /* skip cr3 reload from kernel */
658 xor %rbp, %rbp
659 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
660 jnz 11f
661 cmpl $0, EXT(no_shared_cr3)(%rip)
662 je 2f
663 11:
664 xor %eax, %eax
665 movw %gs:CPU_KERNEL_PCID, %ax
666 or %rax, %rcx
667 mov %rcx, %cr3 /* load kernel cr3 */
668 jmp 4f /* and skip tlb flush test */
669 2:
670 mov %gs:CPU_ACTIVE_CR3+4, %rcx
671 shr $32, %rcx
672 testl %ecx, %ecx
673 jz 4f
674 movl $0, %gs:CPU_TLB_INVALID
675 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
676 and $(~CR4_PGE), %rcx
677 mov %rcx, %cr4
678 or $(CR4_PGE), %rcx
679 mov %rcx, %cr4
680 4:
681 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
682 testq %rcx, %rcx
683 je 5f
684 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
685 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
686 je 5f
687 xor %ecx, %ecx /* If so, reset DR7 (the control) */
688 mov %rcx, %dr7
689 5:
690 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
691 /* Dispatch the designated handler */
692 cmp EXT(dblmap_base)(%rip), %rsp
693 jb 66f
694 cmp EXT(dblmap_max)(%rip), %rsp
695 jge 66f
696 subq EXT(dblmap_dist)(%rip), %rsp
697 subq EXT(dblmap_dist)(%rip), %r15
698 66:
699 leaq EXT(idt64_hndl_table1)(%rip), %rax
700 jmp *(%rax, %rdx, 8)
701
702 /*
703 * Control is passed here to return to user.
704 */
705 Entry(return_to_user)
706 TIME_TRAP_UEXIT
707
708 Entry(ret_to_user)
709 // XXX 'Be nice to tidy up this debug register restore sequence...
710 mov %gs:CPU_ACTIVE_THREAD, %rdx
711 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
712
713 test %rax, %rax /* Is there a debug register context? */
714 je 2f /* branch if not */
715 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
716 jne 1f
717 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
718 movq %rcx, %dr0
719 movl DS_DR1(%rax), %ecx
720 movq %rcx, %dr1
721 movl DS_DR2(%rax), %ecx
722 movq %rcx, %dr2
723 movl DS_DR3(%rax), %ecx
724 movq %rcx, %dr3
725 movl DS_DR7(%rax), %ecx
726 movq %rcx, %gs:CPU_DR7
727 jmp 2f
728 1:
729 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
730 mov %rcx, %dr0
731 mov DS64_DR1(%rax), %rcx
732 mov %rcx, %dr1
733 mov DS64_DR2(%rax), %rcx
734 mov %rcx, %dr2
735 mov DS64_DR3(%rax), %rcx
736 mov %rcx, %dr3
737 mov DS64_DR7(%rax), %rcx
738 mov %rcx, %gs:CPU_DR7
739 2:
740 /*
741 * On exiting the kernel there's typically no need to switch cr3 since we're
742 * already running in the user's address space which includes the
743 * kernel. We now mark the task's cr3 as active, for TLB coherency.
744 * If the target address space has a pagezero mapping present, or
745 * if no_shared_cr3 is set, we do need to switch cr3 at this point.
746 */
747 mov %gs:CPU_TASK_CR3, %rcx
748 mov %rcx, %gs:CPU_ACTIVE_CR3
749 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
750 jnz L_cr3_switch_island
751 movl EXT(no_shared_cr3)(%rip), %eax
752 test %eax, %eax /* -no_shared_cr3 */
753 jnz L_cr3_switch_island
754
755 L_cr3_switch_return:
756 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
757 cmp $0, %rax
758 je 4f
759 mov %rax, %dr7 /* Set DR7 */
760 movq $0, %gs:CPU_DR7
761 4:
762 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
763 je L_64bit_return
764
765 L_32bit_return:
766 #if DEBUG_IDT64
767 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
768 je 1f
769 cli
770 POSTCODE2(0x6432)
771 CCALL1(panic_idt64, %r15)
772 1:
773 #endif /* DEBUG_IDT64 */
774
775 /*
776 * Restore registers into the machine state for iret.
777 * Here on fault stack and PCB address in R11.
778 */
779 movl R32_EIP(%r15), %eax
780 movl %eax, R64_RIP(%r15)
781 movl R32_EFLAGS(%r15), %eax
782 movl %eax, R64_RFLAGS(%r15)
783 movl R32_CS(%r15), %eax
784 movl %eax, R64_CS(%r15)
785 movl R32_UESP(%r15), %eax
786 movl %eax, R64_RSP(%r15)
787 movl R32_SS(%r15), %eax
788 movl %eax, R64_SS(%r15)
789
790 /* Validate DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
791 /* Exempt "known good" statically configured selectors, e.g. USER_DS and 0 */
792 cmpl $(USER_DS), R32_DS(%r15)
793 jz 22f
794 cmpl $0, R32_DS(%r15)
795 jz 22f
796 larw R32_DS(%r15), %ax
797 jz 22f
798 movl $(USER_DS), R32_DS(%r15)
799 22:
800 cmpl $(USER_DS), R32_ES(%r15)
801 jz 33f
802 cmpl $0, R32_ES(%r15)
803 jz 33f
804 larw R32_ES(%r15), %ax
805 jz 33f
806 movl $(USER_DS), R32_ES(%r15)
807 33:
808 cmpl $(USER_DS), R32_FS(%r15)
809 jz 44f
810 cmpl $0, R32_FS(%r15)
811 jz 44f
812 larw R32_FS(%r15), %ax
813 jz 44f
814 movl $(USER_DS), R32_FS(%r15)
815 44:
816 cmpl $(USER_CTHREAD), R32_GS(%r15)
817 jz 55f
818 cmpl $0, R32_GS(%r15)
819 jz 55f
820 larw R32_GS(%r15), %ax
821 jz 55f
822 movl $(USER_CTHREAD), R32_GS(%r15)
823 55:
824 /*
825 * Restore general 32-bit registers
826 */
827 movl R32_EAX(%r15), %eax
828 movl R32_EBX(%r15), %ebx
829 movl R32_ECX(%r15), %ecx
830 movl R32_EDX(%r15), %edx
831 movl R32_EBP(%r15), %ebp
832 movl R32_ESI(%r15), %esi
833 movl R32_EDI(%r15), %edi
834
835 /*
836 * Restore segment registers. A segment exception taken here will
837 * push state on the IST1 stack and will not affect the "PCB stack".
838 */
839 mov %r15, %rsp /* Set the PCB as the stack */
840 swapgs
841
842 /* Zero 64-bit-exclusive GPRs to prevent data leaks */
843 xor %r8, %r8
844 xor %r9, %r9
845 xor %r10, %r10
846 xor %r11, %r11
847 xor %r12, %r12
848 xor %r13, %r13
849 xor %r14, %r14
850 xor %r15, %r15
851
852 EXT(ret32_set_ds):
853 movw R32_DS(%rsp), %ds
854 EXT(ret32_set_es):
855 movw R32_ES(%rsp), %es
856 EXT(ret32_set_fs):
857 movw R32_FS(%rsp), %fs
858 EXT(ret32_set_gs):
859 movw R32_GS(%rsp), %gs
860
861 /* pop compat frame + trapno, trapfn and error */
862 add $(ISS64_OFFSET)+8+8+8, %rsp
863 cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
864 /* test for fast entry/exit */
865 je L_fast_exit
866 EXT(ret32_iret):
867 iretq /* return from interrupt */
868
869 L_fast_exit:
870 pop %rdx /* user return eip */
871 pop %rcx /* pop and toss cs */
872 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
873 popf /* flags - carry denotes failure */
874 pop %rcx /* user return esp */
875 sti /* interrupts enabled after sysexit */
876 sysexitl /* 32-bit sysexit */
877
878 L_cr3_switch_island:
879 xor %eax, %eax
880 movw %gs:CPU_ACTIVE_PCID, %ax
881 or %rax, %rcx
882 mov %rcx, %cr3
883 jmp L_cr3_switch_return
884
885 ret_to_kernel:
886 #if DEBUG_IDT64
887 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
888 je 1f
889 cli
890 POSTCODE2(0x6464)
891 CCALL1(panic_idt64, %r15)
892 hlt
893 1:
894 cmpl $(KERNEL64_CS), R64_CS(%r15)
895 je 2f
896 CCALL1(panic_idt64, %r15)
897 hlt
898 2:
899 #endif
900
901 L_64bit_return:
902 /*
903 * Restore general 64-bit registers.
904 * Here on fault stack and PCB address in R15.
905 */
906 leaq EXT(idt64_hndl_table0)(%rip), %rax
907 jmp *8(%rax)
908
909 Entry(ks_idt64_debug_kernel)
910 /*
911 * trap came from kernel mode
912 */
913
914 push %rax /* save %rax temporarily */
915 lea EXT(idt64_sysenter)(%rip), %rax
916 cmp %rax, ISF64_RIP+8(%rsp)
917 pop %rax
918 jne EXT(ks_dispatch_kernel)
919 /*
920 * Interrupt stack frame has been pushed on the temporary stack.
921 * We have to switch to pcb stack and patch up the saved state.
922 */
923 mov %rcx, ISF64_ERR(%rsp) /* save %rcx in error slot */
924 mov ISF64_SS+8(%rsp), %rcx /* top of temp stack -> pcb stack */
925 xchg %rcx,%rsp /* switch to pcb stack */
926 push $(USER_DS) /* ss */
927 push ISF64_ERR(%rcx) /* saved %rcx into rsp slot */
928 push ISF64_RFLAGS(%rcx) /* rflags */
929 push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */
930 mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
931 jmp L_sysenter_continue /* continue sysenter entry */
932
933 Entry(ks_trap_check_kernel_exit)
934 testb $3,ISF64_CS(%rsp)
935 jz L_kernel_gpf
936
937 /* Here for fault from user-space. Copy interrupt state to PCB. */
938 swapgs
939 push %rax
940 mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */
941 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
942 mov ISF64_SS+8(%rsp), %rax
943 mov %rax, ISF64_SS(%rcx)
944 mov ISF64_RSP+8(%rsp), %rax
945 mov %rax, ISF64_RSP(%rcx)
946 mov ISF64_RFLAGS+8(%rsp), %rax
947 mov %rax, ISF64_RFLAGS(%rcx)
948 mov ISF64_CS+8(%rsp), %rax
949 mov %rax, ISF64_CS(%rcx)
950 mov ISF64_RIP+8(%rsp), %rax
951 mov %rax, ISF64_RIP(%rcx)
952 mov ISF64_ERR+8(%rsp), %rax
953 mov %rax, ISF64_ERR(%rcx)
954 mov ISF64_TRAPFN+8(%rsp), %rax
955 mov %rax, ISF64_TRAPFN(%rcx)
956 mov ISF64_TRAPNO+8(%rsp), %rax
957 mov %rax, ISF64_TRAPNO(%rcx)
958 pop %rax
959 mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */
960 xchg %rcx, %rsp /* to PCB stack with user RCX */
961 jmp EXT(ks_dispatch_user)
962
963 L_kernel_gpf:
964 /* Here for GPF from kernel_space. Check for recoverable cases. */
965 push %rax
966 leaq EXT(ret32_iret)(%rip), %rax
967 cmp %rax, 8+ISF64_RIP(%rsp)
968 je L_fault_iret
969 leaq EXT(ret64_iret)(%rip), %rax
970 cmp %rax, 8+ISF64_RIP(%rsp)
971 je L_fault_iret
972 leaq EXT(ret32_set_ds)(%rip), %rax
973 cmp %rax, 8+ISF64_RIP(%rsp)
974 je L_32bit_fault_set_seg
975 leaq EXT(ret32_set_es)(%rip), %rax
976 cmp %rax, 8+ISF64_RIP(%rsp)
977 je L_32bit_fault_set_seg
978 leaq EXT(ret32_set_fs)(%rip), %rax
979 cmp %rax, 8+ISF64_RIP(%rsp)
980 je L_32bit_fault_set_seg
981 leaq EXT(ret32_set_gs)(%rip), %rax
982 cmp %rax, 8+ISF64_RIP(%rsp)
983 je L_32bit_fault_set_seg
984 jmp EXT(ks_kernel_trap)
985 /* Fall through */
986
987 Entry(ks_kernel_trap)
988 /*
989 * Here after taking an unexpected trap from kernel mode - perhaps
990 * while running in the trampolines hereabouts.
991 * Note: %rax has been pushed on stack.
992 * Make sure we're not on the PCB stack, if so move to the kernel stack.
993 * This is likely a fatal condition.
994 * But first, ensure we have the kernel gs base active...
995 */
996 push %rcx
997 push %rdx
998 mov $(MSR_IA32_GS_BASE), %ecx
999 rdmsr /* read kernel gsbase */
1000 test $0x80000000, %edx /* test MSB of address */
1001 jne 1f
1002 swapgs /* so swap */
1003 1:
1004 pop %rdx
1005 pop %rcx
1006
1007 movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
1008 subq %rsp, %rax
1009 cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
1010 jb 2f /* - yes, deal with it */
1011 pop %rax /* - no, restore %rax */
1012 jmp EXT(ks_dispatch_kernel)
1013 2:
1014 /*
1015 * Here if %rsp is in the PCB
1016 * Copy the interrupt stack frame from PCB stack to kernel stack
1017 */
1018 movq %gs:CPU_KERNEL_STACK, %rax
1019 xchgq %rax, %rsp
1020 pushq 8+ISF64_SS(%rax)
1021 pushq 8+ISF64_RSP(%rax)
1022 pushq 8+ISF64_RFLAGS(%rax)
1023 pushq 8+ISF64_CS(%rax)
1024 pushq 8+ISF64_RIP(%rax)
1025 pushq 8+ISF64_ERR(%rax)
1026 pushq 8+ISF64_TRAPFN(%rax)
1027 pushq 8+ISF64_TRAPNO(%rax)
1028 movq (%rax), %rax
1029 jmp EXT(ks_dispatch_kernel)
1030
1031
1032 /*
1033 * GP/NP fault on IRET: CS or SS is in error.
1034 * User GSBASE is active.
1035 * On IST1 stack containing:
1036 * (rax saved above, which is immediately popped)
1037 * 0 ISF64_TRAPNO: trap code (NP or GP)
1038 * 8 ISF64_TRAPFN: trap function
1039 * 16 ISF64_ERR: segment number in error (error code)
1040 * 24 ISF64_RIP: kernel RIP
1041 * 32 ISF64_CS: kernel CS
1042 * 40 ISF64_RFLAGS: kernel RFLAGS
1043 * 48 ISF64_RSP: kernel RSP
1044 * 56 ISF64_SS: kernel SS
1045 * On the PCB stack, pointed to by the kernel's RSP is:
1046 * 0 user RIP
1047 * 8 user CS
1048 * 16 user RFLAGS
1049 * 24 user RSP
1050 * 32 user SS
1051 *
1052 * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
1053 * as a user fault with:
1054 * 0 ISF64_TRAPNO: trap code (NP or GP)
1055 * 8 ISF64_TRAPFN: trap function
1056 * 16 ISF64_ERR: segment number in error (error code)
1057 * 24 user RIP
1058 * 32 user CS
1059 * 40 user RFLAGS
1060 * 48 user RSP
1061 * 56 user SS
1062 */
1063 L_fault_iret:
1064 pop %rax /* recover saved %rax */
1065 mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
1066 mov ISF64_RSP(%rsp), %rax
1067 xchg %rax, %rsp /* switch to PCB stack */
1068 push ISF64_ERR(%rax)
1069 push ISF64_TRAPFN(%rax)
1070 push ISF64_TRAPNO(%rax)
1071 mov ISF64_RIP(%rax), %rax /* restore rax */
1072 /* now treat as fault from user */
1073 jmp L_dispatch
1074
1075 /*
1076 * Fault restoring a segment register. All of the saved state is still
1077 * on the stack untouched since we haven't yet moved the stack pointer.
1078 * On IST1 stack containing:
1079 * (rax saved above, which is immediately popped)
1080 * 0 ISF64_TRAPNO: trap code (NP or GP)
1081 * 8 ISF64_TRAPFN: trap function
1082 * 16 ISF64_ERR: segment number in error (error code)
1083 * 24 ISF64_RIP: kernel RIP
1084 * 32 ISF64_CS: kernel CS
1085 * 40 ISF64_RFLAGS: kernel RFLAGS
1086 * 48 ISF64_RSP: kernel RSP
1087 * 56 ISF64_SS: kernel SS
1088 * On the PCB stack, pointed to by the kernel's RSP is:
1089 * 0 user trap code
1090 * 8 user trap function
1091 * 16 user err
1092 * 24 user RIP
1093 * 32 user CS
1094 * 40 user RFLAGS
1095 * 48 user RSP
1096 * 56 user SS
1097 */
1098 L_32bit_fault_set_seg:
1099 swapgs
1100 pop %rax /* toss saved %rax from stack */
1101 mov ISF64_TRAPNO(%rsp), %rax
1102 mov ISF64_TRAPFN(%rsp), %rcx
1103 mov ISF64_ERR(%rsp), %rdx
1104 mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
1105 mov %rax,R64_TRAPNO(%rsp)
1106 mov %rcx,R64_TRAPFN(%rsp)
1107 mov %rdx,R64_ERR(%rsp)
1108 /* now treat as fault from user */
1109 /* except that all the state is */
1110 /* already saved - we just have to */
1111 /* move the trapno and error into */
1112 /* the compatibility frame */
1113 jmp L_dispatch_U32_after_fault
1114
1115
1116 Entry(ks_idt64_nmi_kernel)
1117 /* From user-space: copy interrupt state to user PCB */
1118 swapgs
1119 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
1120 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
1121 swapgs /* swap back for L_dispatch */
1122 jmp 4f /* Copy state to PCB */
1123
1124 1:
1125 /*
1126 * From kernel-space:
1127 * Determine whether the kernel or user GS is set.
1128 * Set the kernel and ensure that we'll swap back correctly at IRET.
1129 */
1130 mov $(MSR_IA32_GS_BASE), %ecx
1131 rdmsr /* read kernel gsbase */
1132 test $0x80000000, %edx /* test MSB of address */
1133 jne 2f
1134 swapgs /* so swap */
1135 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
1136 2:
1137 /*
1138 * Determine whether we're on the kernel or interrupt stack
1139 * when the NMI hit.
1140 */
1141 mov ISF64_RSP(%rsp), %rcx
1142 mov %gs:CPU_KERNEL_STACK, %rax
1143 xor %rcx, %rax
1144 and EXT(kernel_stack_mask)(%rip), %rax
1145 test %rax, %rax /* are we on the kernel stack? */
1146 je 3f /* yes */
1147
1148 mov %gs:CPU_INT_STACK_TOP, %rax
1149 dec %rax /* intr stack top is byte above max */
1150 xor %rcx, %rax
1151 and EXT(kernel_stack_mask)(%rip), %rax
1152 test %rax, %rax /* are we on the interrupt stack? */
1153 je 3f /* yes */
1154
1155 mov %gs:CPU_KERNEL_STACK, %rcx
1156 3:
1157 /* 16-byte-align kernel/interrupt stack for state push */
1158 and $0xFFFFFFFFFFFFFFF0, %rcx
1159
1160 4:
1161 /*
1162 * Copy state from NMI stack (RSP) to the save area (RCX) which is
1163 * the PCB for user or kernel/interrupt stack from kernel.
1164 * ISF64_ERR(RSP) saved RAX
1165 * ISF64_TRAPFN(RSP) saved RCX
1166 * ISF64_TRAPNO(RSP) saved RDX
1167 */
1168 xchg %rsp, %rcx /* set for pushes */
1169 push ISF64_SS(%rcx)
1170 push ISF64_RSP(%rcx)
1171 push ISF64_RFLAGS(%rcx)
1172 push ISF64_CS(%rcx)
1173 push ISF64_RIP(%rcx)
1174 push $(0) /* error code 0 */
1175 push $(HNDL_ALLINTRS) /* trapfn allintrs */
1176 push $(T_NMI) /* trapno T_NMI */
1177 mov ISF64_ERR(%rcx), %rax
1178 mov ISF64_TRAPNO(%rcx), %rdx
1179 mov ISF64_TRAPFN(%rcx), %rcx
1180 jmp L_dispatch
1181
1182
1183 /* All 'exceptions' enter hndl_alltraps, with:
1184 * r15 x86_saved_state_t address
1185 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1186 * esi cs at trap
1187 *
1188 * The rest of the state is set up as:
1189 * both rsp and r15 are 16-byte aligned
1190 * interrupts disabled
1191 * direction flag cleared
1192 */
1193 Entry(hndl_alltraps)
1194 mov %esi, %eax
1195 testb $3, %al
1196 jz trap_from_kernel
1197
1198 TIME_TRAP_UENTRY
1199
1200 /* Check for active vtimers in the current task */
1201 mov %gs:CPU_ACTIVE_THREAD, %rcx
1202 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
1203 mov TH_TASK(%rcx), %rbx
1204 TASK_VTIMER_CHECK(%rbx, %rcx)
1205
1206 CCALL1(user_trap, %r15) /* call user trap routine */
1207 /* user_trap() unmasks interrupts */
1208 cli /* hold off intrs - critical section */
1209 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1210
1211
1212 Entry(return_from_trap)
1213 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
1214 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
1215 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1216 jz 1f
1217 xorq %rbp, %rbp /* clear framepointer */
1218 mov %r15, %rdi /* Set RDI to current thread */
1219 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
1220 1:
1221 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
1222 movl %gs:CPU_PENDING_AST,%eax
1223 testl %eax,%eax
1224 je EXT(return_to_user) /* branch if no AST */
1225
1226 L_return_from_trap_with_ast:
1227 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1228 je 2f /* no, go handle the AST */
1229 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
1230 je 1f
1231 /* no... 32-bit user mode */
1232 movl R32_EIP(%r15), %edi
1233 xorq %rbp, %rbp /* clear framepointer */
1234 CCALL(commpage_is_in_pfz32)
1235 testl %eax, %eax
1236 je 2f /* not in the PFZ... go service AST */
1237 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
1238 jmp EXT(return_to_user)
1239 1:
1240 movq R64_RIP(%r15), %rdi
1241 xorq %rbp, %rbp /* clear framepointer */
1242 CCALL(commpage_is_in_pfz64)
1243 testl %eax, %eax
1244 je 2f /* not in the PFZ... go service AST */
1245 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
1246 jmp EXT(return_to_user)
1247 2:
1248
1249 xorq %rbp, %rbp /* clear framepointer */
1250 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
1251
1252 cli
1253 mov %rsp, %r15 /* AST changes stack, saved state */
1254 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1255 jmp EXT(return_from_trap) /* and check again (rare) */
1256
1257 /*
1258 * Trap from kernel mode. No need to switch stacks.
1259 * Interrupts must be off here - we will set them to state at time of trap
1260 * as soon as it's safe for us to do so and not recurse doing preemption
1261 *
1262 */
1263 trap_from_kernel:
1264 movq %r15, %rdi /* saved state addr */
1265 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
1266 pushq %rbp /* Extend framepointer chain */
1267 movq %rsp, %rbp
1268 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
1269 popq %rbp
1270 addq $8, %rsp
1271 mov %rsp, %r15 /* DTrace slides stack/saved-state */
1272 cli
1273
1274 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1275 testl $(AST_URGENT),%eax /* any urgent preemption? */
1276 je ret_to_kernel /* no, nothing to do */
1277 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
1278 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
1279 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
1280 je ret_to_kernel
1281 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1282 jne ret_to_kernel
1283 movq %gs:CPU_KERNEL_STACK,%rax
1284 movq %rsp,%rcx
1285 xorq %rax,%rcx
1286 andq EXT(kernel_stack_mask)(%rip),%rcx
1287 testq %rcx,%rcx /* are we on the kernel stack? */
1288 jne ret_to_kernel /* no, skip it */
1289
1290 CCALL(ast_taken_kernel) /* take the AST */
1291
1292 mov %rsp, %r15 /* AST changes stack, saved state */
1293 jmp ret_to_kernel
1294
1295
1296 /*
1297 * All interrupts on all tasks enter here with:
1298 * r15 x86_saved_state_t
1299 * rsp kernel or interrupt stack
1300 * esi cs at trap
1301 *
1302 * both rsp and r15 are 16-byte aligned
1303 * interrupts disabled
1304 * direction flag cleared
1305 */
1306 Entry(hndl_allintrs)
1307 /*
1308 * test whether already on interrupt stack
1309 */
1310 movq %gs:CPU_INT_STACK_TOP,%rcx
1311 cmpq %rsp,%rcx
1312 jb 1f
1313 leaq -INTSTACK_SIZE(%rcx),%rdx
1314 cmpq %rsp,%rdx
1315 jb int_from_intstack
1316 1:
1317 xchgq %rcx,%rsp /* switch to interrupt stack */
1318
1319 mov %cr0,%rax /* get cr0 */
1320 orl $(CR0_TS),%eax /* or in TS bit */
1321 mov %rax,%cr0 /* set cr0 */
1322
1323 pushq %rcx /* save pointer to old stack */
1324 pushq %gs:CPU_INT_STATE /* save previous intr state */
1325 movq %r15,%gs:CPU_INT_STATE /* set intr state */
1326
1327 TIME_INT_ENTRY /* do timing */
1328
1329 /* Check for active vtimers in the current task */
1330 mov %gs:CPU_ACTIVE_THREAD, %rcx
1331 mov TH_TASK(%rcx), %rbx
1332 TASK_VTIMER_CHECK(%rbx, %rcx)
1333
1334 incl %gs:CPU_PREEMPTION_LEVEL
1335 incl %gs:CPU_INTERRUPT_LEVEL
1336
1337 CCALL1(interrupt, %r15) /* call generic interrupt routine */
1338
1339 .globl EXT(return_to_iret)
1340 LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1341
1342 decl %gs:CPU_INTERRUPT_LEVEL
1343 decl %gs:CPU_PREEMPTION_LEVEL
1344
1345 TIME_INT_EXIT /* do timing */
1346
1347 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1348 popq %rsp /* switch back to old stack */
1349
1350 movq %gs:CPU_ACTIVE_THREAD,%rax
1351 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
1352 cmpq $0,%rax /* Is there a context */
1353 je 1f /* Branch if not */
1354 movl FP_VALID(%rax),%eax /* Load fp_valid */
1355 cmpl $0,%eax /* Check if valid */
1356 jne 1f /* Branch if valid */
1357 clts /* Clear TS */
1358 jmp 2f
1359 1:
1360 mov %cr0,%rax /* get cr0 */
1361 orl $(CR0_TS),%eax /* or in TS bit */
1362 mov %rax,%cr0 /* set cr0 */
1363 2:
1364 /* Load interrupted code segment into %eax */
1365 movl R32_CS(%r15),%eax /* assume 32-bit state */
1366 cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
1367 #if DEBUG_IDT64
1368 jne 4f
1369 movl R64_CS(%r15),%eax /* 64-bit user mode */
1370 jmp 3f
1371 4:
1372 cmpl $(SS_32),SS_FLAVOR(%r15)
1373 je 3f
1374 POSTCODE2(0x6431)
1375 CCALL1(panic_idt64, %r15)
1376 hlt
1377 #else
1378 jne 3f
1379 movl R64_CS(%r15),%eax /* 64-bit user mode */
1380 #endif
1381 3:
1382 testb $3,%al /* user mode, */
1383 jnz ast_from_interrupt_user /* go handle potential ASTs */
1384 /*
1385 * we only want to handle preemption requests if
1386 * the interrupt fell in the kernel context
1387 * and preemption isn't disabled
1388 */
1389 movl %gs:CPU_PENDING_AST,%eax
1390 testl $(AST_URGENT),%eax /* any urgent requests? */
1391 je ret_to_kernel /* no, nothing to do */
1392
1393 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1394 jne ret_to_kernel /* yes, skip it */
1395
1396 /*
1397 * Take an AST from kernel space. We don't need (and don't want)
1398 * to do as much as the case where the interrupt came from user
1399 * space.
1400 */
1401 CCALL(ast_taken_kernel)
1402
1403 mov %rsp, %r15 /* AST changes stack, saved state */
1404 jmp ret_to_kernel
1405
1406
1407 /*
1408 * nested int - simple path, can't preempt etc on way out
1409 */
1410 int_from_intstack:
1411 incl %gs:CPU_PREEMPTION_LEVEL
1412 incl %gs:CPU_INTERRUPT_LEVEL
1413 incl %gs:CPU_NESTED_ISTACK
1414
1415 push %gs:CPU_INT_STATE
1416 mov %r15, %gs:CPU_INT_STATE
1417
1418 CCALL1(interrupt, %r15)
1419
1420 pop %gs:CPU_INT_STATE
1421
1422 decl %gs:CPU_INTERRUPT_LEVEL
1423 decl %gs:CPU_PREEMPTION_LEVEL
1424 decl %gs:CPU_NESTED_ISTACK
1425
1426 jmp ret_to_kernel
1427
1428 /*
1429 * Take an AST from an interrupted user
1430 */
1431 ast_from_interrupt_user:
1432 movl %gs:CPU_PENDING_AST,%eax
1433 testl %eax,%eax /* pending ASTs? */
1434 je EXT(ret_to_user) /* no, nothing to do */
1435
1436 TIME_TRAP_UENTRY
1437
1438 movl $1, %ecx /* check if we're in the PFZ */
1439 jmp L_return_from_trap_with_ast /* return */
1440
1441
1442 /* Syscall dispatch routines! */
1443
1444 /*
1445 *
1446 * 32bit Tasks
1447 * System call entries via INTR_GATE or sysenter:
1448 *
1449 * r15 x86_saved_state32_t
1450 * rsp kernel stack
1451 *
1452 * both rsp and r15 are 16-byte aligned
1453 * interrupts disabled
1454 * direction flag cleared
1455 */
1456
1457 Entry(hndl_sysenter)
1458 /*
1459 * We can be here either for a mach syscall or a unix syscall,
1460 * as indicated by the sign of the code:
1461 */
1462 movl R32_EAX(%r15),%eax
1463 testl %eax,%eax
1464 js EXT(hndl_mach_scall) /* < 0 => mach */
1465 /* > 0 => unix */
1466
1467 Entry(hndl_unix_scall)
1468
1469 TIME_TRAP_UENTRY
1470
1471 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1472 movq TH_TASK(%rcx),%rbx /* point to current task */
1473 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1474
1475 /* Check for active vtimers in the current task */
1476 TASK_VTIMER_CHECK(%rbx,%rcx)
1477
1478 sti
1479
1480 CCALL1(unix_syscall, %r15)
1481 /*
1482 * always returns through thread_exception_return
1483 */
1484
1485
1486 Entry(hndl_mach_scall)
1487 TIME_TRAP_UENTRY
1488
1489 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1490 movq TH_TASK(%rcx),%rbx /* point to current task */
1491 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1492
1493 /* Check for active vtimers in the current task */
1494 TASK_VTIMER_CHECK(%rbx,%rcx)
1495
1496 sti
1497
1498 CCALL1(mach_call_munger, %r15)
1499 /*
1500 * always returns through thread_exception_return
1501 */
1502
1503
1504 Entry(hndl_mdep_scall)
1505 TIME_TRAP_UENTRY
1506
1507 /* Check for active vtimers in the current task */
1508 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1509 movq TH_TASK(%rcx),%rbx /* point to current task */
1510 TASK_VTIMER_CHECK(%rbx,%rcx)
1511
1512 sti
1513
1514 CCALL1(machdep_syscall, %r15)
1515 /*
1516 * always returns through thread_exception_return
1517 */
1518
1519 /*
1520 * 64bit Tasks
1521 * System call entries via syscall only:
1522 *
1523 * r15 x86_saved_state64_t
1524 * rsp kernel stack
1525 *
1526 * both rsp and r15 are 16-byte aligned
1527 * interrupts disabled
1528 * direction flag cleared
1529 */
1530
1531 Entry(hndl_syscall)
1532 TIME_TRAP_UENTRY
1533
1534 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1535 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
1536 movq TH_TASK(%rcx),%rbx /* point to current task */
1537
1538 /* Check for active vtimers in the current task */
1539 TASK_VTIMER_CHECK(%rbx,%rcx)
1540
1541 /*
1542 * We can be here either for a mach, unix machdep or diag syscall,
1543 * as indicated by the syscall class:
1544 */
1545 movl R64_RAX(%r15), %eax /* syscall number/class */
1546 movl %eax, %edx
1547 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1548 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1549 je EXT(hndl_mach_scall64)
1550 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1551 je EXT(hndl_unix_scall64)
1552 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1553 je EXT(hndl_mdep_scall64)
1554 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1555 je EXT(hndl_diag_scall64)
1556
1557 /* Syscall class unknown */
1558 sti
1559 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1560 /* no return */
1561
1562
1563 Entry(hndl_unix_scall64)
1564 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1565 sti
1566
1567 CCALL1(unix_syscall64, %r15)
1568 /*
1569 * always returns through thread_exception_return
1570 */
1571
1572
1573 Entry(hndl_mach_scall64)
1574 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1575 sti
1576
1577 CCALL1(mach_call_munger64, %r15)
1578 /*
1579 * always returns through thread_exception_return
1580 */
1581
1582
1583
1584 Entry(hndl_mdep_scall64)
1585 sti
1586
1587 CCALL1(machdep_syscall64, %r15)
1588 /*
1589 * always returns through thread_exception_return
1590 */
1591
1592 Entry(hndl_diag_scall64)
1593 CCALL1(diagCall64, %r15) // Call diagnostics
1594 test %eax, %eax // What kind of return is this?
1595 je 1f // - branch if bad (zero)
1596 jmp EXT(return_to_user) // Normal return, do not check asts...
1597 1:
1598 sti
1599 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1600 /* no return */
1601 /* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
1602 Entry(hndl_machine_check)
1603 /* Adjust SP and savearea to their canonical, non-aliased addresses */
1604 CCALL1(panic_machine_check64, %r15)
1605 hlt
1606
1607 Entry(hndl_double_fault)
1608 CCALL1(panic_double_fault64, %r15)
1609 hlt