]> git.saurik.com Git - apple/xnu.git/blob - osfmk/x86_64/idt64.s
d17bb5bb7b2ca8ab4fa3596f42791dbe65fbcaa3
[apple/xnu.git] / osfmk / x86_64 / idt64.s
1 /*
2 * Copyright (c) 2010-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <i386/asm.h>
29 #include <assym.s>
30 #include <debug.h>
31 #include <i386/eflags.h>
32 #include <i386/rtclock_asm.h>
33 #include <i386/trap.h>
34 #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
35 #include <mach/i386/syscall_sw.h>
36 #include <i386/postcode.h>
37 #include <i386/proc_reg.h>
38 #include <mach/exception_types.h>
39
40 #if DEBUG
41 #define DEBUG_IDT64 1
42 #endif
43
44 /*
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
48 *
49 * Code here is structured as follows:
50 *
51 * stubs Code called directly from an IDT vector.
52 * All entry points have the "idt64_" prefix and they are built
53 * using macros expanded by the inclusion of idt_table.h.
54 * This code performs vector-dependent identification and jumps
55 * into the dispatch code.
56 *
57 * dispatch The dispatch code is responsible for saving the thread state
58 * (which is either 64-bit or 32-bit) and then jumping to the
59 * class handler identified by the stub.
60 *
61 * returns Code to restore state and return to the previous context.
62 *
63 * handlers There are several classes of handlers:
64 * interrupt - asynchronous events typically from external devices
65 * trap - synchronous events due to thread execution
66 * syscall - synchronous system call request
67 * fatal - fatal traps
68 */
69 /*
70 * Indices of handlers for each exception type.
71 */
72 #define HNDL_ALLINTRS 0
73 #define HNDL_ALLTRAPS 1
74 #define HNDL_SYSENTER 2
75 #define HNDL_SYSCALL 3
76 #define HNDL_UNIX_SCALL 4
77 #define HNDL_MACH_SCALL 5
78 #define HNDL_MDEP_SCALL 6
79 #define HNDL_DOUBLE_FAULT 7
80 #define HNDL_MACHINE_CHECK 8
81
82 /* Begin double-mapped descriptor section */
83
84 .section __HIB, __desc
85 .globl EXT(idt64_hndl_table0)
86 EXT(idt64_hndl_table0):
87 /* 0x00 */ .quad EXT(ks_dispatch)
88 /* 0x08 */ .quad EXT(ks_64bit_return)
89 /* 0x10 */ .quad 0 /* Populated with CPU shadow displacement*/
90 /* 0x18 */ .quad EXT(ks_return)
91 #define TBL0_OFF_DISP_USER_WITH_POPRAX 0x20
92 /* 0x20 */ .quad EXT(ks_dispatch_user_with_pop_rax)
93 #define TBL0_OFF_DISP_KERN_WITH_POPRAX 0x28
94 /* 0x28 */ .quad EXT(ks_dispatch_kernel_with_pop_rax)
95 #define TBL0_OFF_PTR_KERNEL_STACK_MASK 0x30
96 /* 0x30 */ .quad 0 /* &kernel_stack_mask */
97
98 EXT(idt64_hndl_table1):
99 .quad EXT(hndl_allintrs)
100 .quad EXT(hndl_alltraps)
101 .quad EXT(hndl_sysenter)
102 .quad EXT(hndl_syscall)
103 .quad EXT(hndl_unix_scall)
104 .quad EXT(hndl_mach_scall)
105 .quad EXT(hndl_mdep_scall)
106 .quad EXT(hndl_double_fault)
107 .quad EXT(hndl_machine_check)
108 .text
109
110
111 /* The wrapper for all non-special traps/interrupts */
112 /* Everything up to PUSH_FUNCTION is just to output
113 * the interrupt number out to the postcode display
114 */
115 #if DEBUG_IDT64
116 #define IDT_ENTRY_WRAPPER(n, f) \
117 push %rax ;\
118 POSTCODE2(0x6400+n) ;\
119 pop %rax ;\
120 pushq $(f) ;\
121 pushq $(n) ;\
122 jmp L_dispatch
123 #else
124 #define IDT_ENTRY_WRAPPER(n, f) \
125 pushq $(f) ;\
126 pushq $(n) ;\
127 jmp L_dispatch
128 #endif
129
130 /* A trap that comes with an error code already on the stack */
131 #define TRAP_ERR(n, f) \
132 Entry(f) ;\
133 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
134
135 /* A normal trap */
136 #define TRAP(n, f) \
137 Entry(f) ;\
138 pushq $0 ;\
139 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
140
141 #define USER_TRAP TRAP
142
143 /* An interrupt */
144 #define INTERRUPT(n) \
145 Entry(_intr_ ## n) ;\
146 pushq $0 ;\
147 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
148
149 /* A trap with a special-case handler, hence we don't need to define anything */
150 #define TRAP_SPC(n, f)
151 #define TRAP_IST1(n, f)
152 #define TRAP_IST2(n, f)
153 #define USER_TRAP_SPC(n, f)
154
155 /* Begin double-mapped text section */
156 .section __HIB, __text
157 /* Generate all the stubs */
158 #include "idt_table.h"
159
160 Entry(idt64_page_fault)
161 pushq $(HNDL_ALLTRAPS)
162 push $(T_PAGE_FAULT)
163 jmp L_dispatch
164
165 /*
166 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the
167 * kernel while not on the kernel's gsbase.
168 */
169 Entry(idt64_debug)
170 /* Synthesize common interrupt stack frame */
171 push $0 /* error code */
172 pushq $(HNDL_ALLTRAPS)
173 pushq $(T_DEBUG)
174 /* Spill prior to RDMSR */
175 push %rax
176 push %rcx
177 push %rdx
178 mov $(MSR_IA32_GS_BASE), %ecx
179 rdmsr /* Check contents of GSBASE MSR */
180 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */
181 jnz 1f
182
183 /*
184 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space,
185 * it must have happened within the very small window on entry or exit before or after (respectively)
186 * swapgs occurred. In those cases, consider the #DB spurious and immediately return.
187 */
188 testb $3, 8+8+8+ISF64_CS(%rsp)
189 jnz 2f
190 pop %rdx
191 pop %rcx
192 pop %rax
193 addq $0x18, %rsp /* Remove synthesized interrupt stack frame */
194 jmp EXT(ret64_iret)
195 2:
196 swapgs /* direct from user */
197 1:
198 pop %rdx
199
200 leaq EXT(idt64_hndl_table0)(%rip), %rax
201 mov 16(%rax), %rax /* Offset of per-CPU shadow */
202
203 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
204 mov %rax, %cr3
205
206 pop %rcx
207
208 /* Note that %rax will be popped from the stack in ks_dispatch, below */
209
210 leaq EXT(idt64_hndl_table0)(%rip), %rax
211 jmp *(%rax)
212
213 /*
214 * Legacy interrupt gate System call handlers.
215 * These are entered via a syscall interrupt. The system call number in %rax
216 * is saved to the error code slot in the stack frame. We then branch to the
217 * common state saving code.
218 */
219
220 #ifndef UNIX_INT
221 #error NO UNIX INT!!!
222 #endif
223 Entry(idt64_unix_scall)
224 pushq %rax /* save system call number */
225 pushq $(HNDL_UNIX_SCALL)
226 pushq $(UNIX_INT)
227 jmp L_u64bit_entry_check
228
229 Entry(idt64_mach_scall)
230 pushq %rax /* save system call number */
231 pushq $(HNDL_MACH_SCALL)
232 pushq $(MACH_INT)
233 jmp L_u64bit_entry_check
234
235 Entry(idt64_mdep_scall)
236 pushq %rax /* save system call number */
237 pushq $(HNDL_MDEP_SCALL)
238 pushq $(MACHDEP_INT)
239 jmp L_u64bit_entry_check
240
241 /*
242 * For GP/NP/SS faults, we use the IST1 stack.
243 * For faults from user-space, we have to copy the machine state to the
244 * PCB stack and then dispatch as normal.
245 * For faults in kernel-space, we need to scrub for kernel exit faults and
246 * treat these as user-space faults. But for all other kernel-space faults
247 * we continue to run on the IST1 stack and we dispatch to handle the fault
248 * as fatal.
249 */
250 Entry(idt64_gen_prot)
251 pushq $(HNDL_ALLTRAPS)
252 pushq $(T_GENERAL_PROTECTION)
253 jmp L_check_for_kern_flt
254
255 Entry(idt64_stack_fault)
256 pushq $(HNDL_ALLTRAPS)
257 pushq $(T_STACK_FAULT)
258 jmp L_check_for_kern_flt
259
260 L_check_for_kern_flt:
261 /*
262 * If we took a #GP or #SS from the kernel, check if we took them
263 * from either ret32_iret or ret64_iret. If we did, we need to
264 * jump into L_dispatch at the swapgs so that the code in L_dispatch
265 * can proceed with the correct GSbase.
266 */
267 pushq %rax
268 testb $3, 8+ISF64_CS(%rsp)
269 jnz L_dispatch_from_user_no_push_rax /* Fault from user, go straight to dispatch */
270 leaq EXT(ret32_iret)(%rip), %rax
271 cmpq %rax, 8+ISF64_RIP(%rsp)
272 je 1f
273 leaq EXT(ret64_iret)(%rip), %rax
274 cmpq %rax, 8+ISF64_RIP(%rsp)
275 je 1f
276 jmp L_dispatch_from_kernel_no_push_rax
277 /*
278 * We hit the fault on iretq, so check the original return %cs. If
279 * it's a user %cs, fixup the stack and then jump to dispatch..
280 *
281 * With this type of fault, the stack is layed-out as follows:
282 *
283 *
284 * orig %ss saved_rsp+32
285 * orig %rsp saved_rsp+24
286 * orig %rflags saved_rsp+16
287 * orig %cs saved_rsp+8
288 * orig %rip saved_rsp
289 * ^^^^^^^^^ (maybe on another stack, since we switched to IST1)
290 * %ss +64 -8
291 * saved_rsp +56 -16
292 * %rflags +48 -24
293 * %cs +40 -32
294 * %rip +32 -40
295 * error code +24 -48
296 * hander +16 -56
297 * trap number +8 -64
298 * <saved %rax> <== %rsp -72
299 */
300 1:
301 pushq %rbx
302 movq 16+ISF64_RSP(%rsp), %rbx
303 movq ISF64_CS-24(%rbx), %rax
304 testb $3, %al /* If the original return destination was to user */
305 jnz 2f
306 popq %rbx
307 jmp L_dispatch_from_kernel_no_push_rax /* Fault occurred when trying to return to kernel */
308 2:
309 /*
310 * Fix the stack so the original trap frame is current, then jump to dispatch
311 */
312 movq %rax, 16+ISF64_CS(%rsp)
313
314 movq ISF64_RSP-24(%rbx), %rax
315 movq %rax, 16+ISF64_RSP(%rsp)
316
317 movq ISF64_RIP-24(%rbx), %rax
318 movq %rax, 16+ISF64_RIP(%rsp)
319
320 movq ISF64_SS-24(%rbx), %rax
321 movq %rax, 16+ISF64_SS(%rsp)
322
323 movq ISF64_RFLAGS-24(%rbx), %rax
324 movq %rax, 16+ISF64_RFLAGS(%rsp)
325
326 popq %rbx
327 jmp L_dispatch_from_user_no_push_rax
328
329 Entry(idt64_segnp)
330 pushq $(HNDL_ALLTRAPS)
331 pushq $(T_SEGMENT_NOT_PRESENT)
332 jmp L_dispatch
333
334 /*
335 * Fatal exception handlers:
336 */
337 Entry(idt64_db_task_dbl_fault)
338 pushq $(HNDL_DOUBLE_FAULT)
339 pushq $(T_DOUBLE_FAULT)
340 jmp L_dispatch
341
342 Entry(idt64_db_task_stk_fault)
343 pushq $(HNDL_DOUBLE_FAULT)
344 pushq $(T_STACK_FAULT)
345 jmp L_dispatch
346
347 Entry(idt64_mc)
348 push $(0) /* Error */
349 pushq $(HNDL_MACHINE_CHECK)
350 pushq $(T_MACHINE_CHECK)
351 jmp L_dispatch
352
353 /*
354 * NMI
355 * This may or may not be fatal but extreme care is required
356 * because it may fall when control was already in another trampoline.
357 *
358 * We get here on IST2 stack which is used exclusively for NMIs.
359 * Machine checks, doublefaults and similar use IST1
360 */
361 Entry(idt64_nmi)
362 push %rax
363 push %rcx
364 push %rdx
365 testb $3, ISF64_CS(%rsp)
366 jz 1f
367
368 /* From user-space: copy interrupt state to user PCB */
369 swapgs
370
371 leaq EXT(idt64_hndl_table0)(%rip), %rax
372 mov 16(%rax), %rax /* Offset of per-CPU shadow */
373 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
374 mov %rax, %cr3 /* note that SMAP is enabled in L_common_dispatch (on Broadwell+) */
375
376 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
377 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
378
379 leaq TBL0_OFF_DISP_USER_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_user_with_pop_rax */
380 jmp 4f /* Copy state to PCB */
381
382 1:
383 /*
384 * From kernel-space:
385 * Determine whether the kernel or user GS is set.
386 * Sets the high 32 bits of the return CS to 1 to ensure that we'll swapgs back correctly at IRET.
387 */
388 mov $(MSR_IA32_GS_BASE), %ecx
389 rdmsr /* read kernel gsbase */
390 test $0x80000000, %edx /* test MSB of address */
391 jnz 2f
392 swapgs /* so swap */
393 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
394 2:
395
396 leaq EXT(idt64_hndl_table0)(%rip), %rax
397 mov 16(%rax), %rax /* Offset of per-CPU shadow */
398 mov %cr3, %rdx
399 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
400 mov %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */
401
402 /*
403 * Determine whether we're on the kernel or interrupt stack
404 * when the NMI hit.
405 */
406 mov ISF64_RSP(%rsp), %rcx
407 mov %gs:CPU_KERNEL_STACK, %rax
408 xor %rcx, %rax
409 movq TBL0_OFF_PTR_KERNEL_STACK_MASK+EXT(idt64_hndl_table0)(%rip), %rdx
410 mov (%rdx), %rdx /* Load kernel_stack_mask */
411 and %rdx, %rax
412 test %rax, %rax /* are we on the kernel stack? */
413 jz 3f /* yes */
414
415 mov %gs:CPU_INT_STACK_TOP, %rax
416 cmp %rcx, %rax /* are we on the interrupt stack? */
417 jb 5f /* no */
418 leaq -INTSTACK_SIZE(%rax), %rax
419 cmp %rcx, %rax
420 jb 3f /* yes */
421 5:
422 mov %gs:CPU_KERNEL_STACK, %rcx
423 3:
424 /* 16-byte-align kernel/interrupt stack for state push */
425 and $0xFFFFFFFFFFFFFFF0, %rcx
426
427 leaq TBL0_OFF_DISP_KERN_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_kernel_with_pop_rax */
428 4:
429 /*
430 * Copy state from NMI stack (RSP) to the save area (RCX) which is
431 * the PCB for user or kernel/interrupt stack from kernel.
432 * ISF64_ERR(RSP) saved RAX
433 * ISF64_TRAPFN(RSP) saved RCX
434 * ISF64_TRAPNO(RSP) saved RDX
435 */
436 xchg %rsp, %rcx /* set for pushes */
437 push ISF64_SS(%rcx)
438 push ISF64_RSP(%rcx)
439 push ISF64_RFLAGS(%rcx)
440 push ISF64_CS(%rcx)
441 push ISF64_RIP(%rcx)
442 /* Synthesize common interrupt stack frame */
443 push $(0) /* error code 0 */
444 push $(HNDL_ALLINTRS) /* trapfn allintrs */
445 push $(T_NMI) /* trapno T_NMI */
446 push ISF64_ERR(%rcx) /* saved %rax is popped in ks_dispatch_{kernel|user}_with_pop_rax */
447 mov ISF64_TRAPNO(%rcx), %rdx
448 mov ISF64_TRAPFN(%rcx), %rcx
449
450 jmp *(%rax) /* ks_dispatch_{kernel|user}_with_pop_rax */
451
452 Entry(idt64_double_fault)
453 pushq $(HNDL_DOUBLE_FAULT)
454 pushq $(T_DOUBLE_FAULT)
455 jmp L_dispatch
456
457 Entry(hi64_syscall)
458 Entry(idt64_syscall)
459 swapgs
460 /* Use RAX as a temporary by shifting its contents into R11[32:63]
461 * The systemcall number is defined to be a 32-bit quantity, as is
462 * RFLAGS.
463 */
464 shlq $32, %rax
465 or %rax, %r11
466 .globl EXT(dblsyscall_patch_point)
467 EXT(dblsyscall_patch_point):
468 // movabsq $0x12345678ABCDEFFFULL, %rax
469 /* Generate offset to the double-mapped per-CPU data shadow
470 * into RAX
471 */
472 leaq EXT(idt64_hndl_table0)(%rip), %rax
473 mov 16(%rax), %rax
474 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
475 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
476 sub $(ISF64_SIZE), %rsp
477
478 /*
479 * Synthesize an ISF frame on the exception stack
480 */
481 movl $(USER_DS), ISF64_SS(%rsp)
482 mov %rcx, ISF64_RIP(%rsp) /* rip */
483
484 mov %gs:CPU_UBER_TMP(%rax), %rcx
485 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
486
487 mov %r11, %rax
488 shrq $32, %rax /* Restore RAX */
489 mov %r11d, %r11d /* Clear r11[32:63] */
490
491 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
492 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
493 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
494 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
495 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
496 swapgs
497 jmp L_dispatch /* this can only be 64-bit */
498
499 Entry(hi64_sysenter)
500 Entry(idt64_sysenter)
501 /* Synthesize an interrupt stack frame onto the
502 * exception stack.
503 */
504 push $(USER_DS) /* ss */
505 push %rcx /* uesp */
506 pushf /* flags */
507 /*
508 * Clear, among others, the Nested Task (NT) flags bit;
509 * this is zeroed by INT, but not by SYSENTER.
510 */
511 push $0
512 popf
513 push $(SYSENTER_CS) /* cs */
514 L_sysenter_continue:
515 push %rdx /* eip */
516 push %rax /* err/eax - syscall code */
517 pushq $(HNDL_SYSENTER)
518 pushq $(T_SYSENTER)
519 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
520 jmp L_u64bit_entry_check
521
522 /*
523 * Common dispatch point.
524 * Determine what mode has been interrupted and save state accordingly.
525 * Here with:
526 * rsp from user-space: interrupt state in PCB, or
527 * from kernel-space: interrupt state in kernel or interrupt stack
528 * GSBASE from user-space: pthread area, or
529 * from kernel-space: cpu_data
530 */
531
532 L_dispatch:
533 pushq %rax
534 testb $3, 8+ISF64_CS(%rsp)
535 jz 1f
536 L_dispatch_from_user_no_push_rax:
537 swapgs
538 leaq EXT(idt64_hndl_table0)(%rip), %rax
539 mov 16(%rax), %rax
540 L_dispatch_kgsb:
541 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
542 mov %rax, %cr3
543 #if DEBUG
544 mov %rax, %gs:CPU_ENTRY_CR3
545 #endif
546 L_dispatch_from_kernel_no_push_rax:
547 1:
548 leaq EXT(idt64_hndl_table0)(%rip), %rax
549 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
550 /* Indirect branch to non-doublemapped trampolines */
551 jmp *(%rax)
552 /* User return: register restoration and address space switch sequence */
553 Entry(ks_64bit_return)
554 mov R64_R14(%r15), %r14
555 mov R64_R13(%r15), %r13
556 mov R64_R12(%r15), %r12
557 mov R64_R11(%r15), %r11
558 mov R64_R10(%r15), %r10
559 mov R64_R9(%r15), %r9
560 mov R64_R8(%r15), %r8
561 mov R64_RSI(%r15), %rsi
562 mov R64_RDI(%r15), %rdi
563 mov R64_RBP(%r15), %rbp
564 mov R64_RDX(%r15), %rdx
565 mov R64_RCX(%r15), %rcx
566 mov R64_RBX(%r15), %rbx
567 mov R64_RAX(%r15), %rax
568 /* Switch to per-CPU exception stack */
569 mov %gs:CPU_ESTACK, %rsp
570
571 /* Synthesize interrupt stack frame from PCB savearea to exception stack */
572 push R64_SS(%r15)
573 push R64_RSP(%r15)
574 push R64_RFLAGS(%r15)
575 push R64_CS(%r15)
576 push R64_RIP(%r15)
577
578 cmpq $(KERNEL64_CS), 8(%rsp)
579 jne 1f /* Returning to user (%r15 will be restored after the segment checks) */
580 mov R64_R15(%r15), %r15
581 jmp L_64b_kernel_return /* Returning to kernel */
582
583 1:
584 push %rax /* [A] */
585 movl %gs:CPU_NEED_SEGCHK, %eax
586 push %rax /* [B] */
587
588 /* Returning to user */
589 cmpl $0, %gs:CPU_CURTASK_HAS_LDT /* If the current task has an LDT, check and restore segment regs */
590 jne L_64b_segops_island
591
592 /*
593 * Restore %r15, since we're now done accessing saved state
594 * and (%r15) won't be accessible after the %cr3 load anyway.
595 * Note that %r15 is restored below for the segment-restore
596 * case, just after we no longer need to access register state
597 * relative to %r15.
598 */
599 mov R64_R15(%r15), %r15
600
601 /*
602 * Note that this %cr3 sequence is duplicated here to save
603 * [at least] a load and comparison that would be required if
604 * this block were shared.
605 */
606 /* Discover user cr3/ASID */
607 mov %gs:CPU_UCR3, %rax
608 #if DEBUG
609 mov %rax, %gs:CPU_EXIT_CR3
610 #endif
611 mov %rax, %cr3
612 /* Continue execution on the shared/doublemapped trampoline */
613 swapgs
614
615 L_chk_sysret:
616 pop %rax /* Matched to [B], above (segchk required) */
617
618 /*
619 * At this point, the stack contains:
620 *
621 * +--------------+
622 * | Return SS | +40
623 * | Return RSP | +32
624 * | Return RFL | +24
625 * | Return CS | +16
626 * | Return RIP | +8
627 * | Saved RAX | <-- rsp
628 * +--------------+
629 */
630 cmpl $(SYSCALL_CS), 16(%rsp) /* test for exit via SYSRET */
631 je L_sysret
632
633 cmpl $1, %eax
634 je L_verw_island_2
635
636 pop %rax /* Matched to [A], above */
637
638 L_64b_kernel_return:
639 .globl EXT(ret64_iret)
640 EXT(ret64_iret):
641 iretq /* return from interrupt */
642
643
644 L_sysret:
645 cmpl $1, %eax
646 je L_verw_island_3
647
648 pop %rax /* Matched to [A], above */
649 /*
650 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
651 * rcx user rip
652 * r11 user rflags
653 * rsp user stack pointer
654 */
655 pop %rcx
656 add $8, %rsp
657 pop %r11
658 pop %rsp
659 sysretq /* return from system call */
660
661
662 L_verw_island_2:
663
664 pop %rax /* Matched to [A], above */
665 verw 40(%rsp) /* verw operates on the %ss value already on the stack */
666 jmp EXT(ret64_iret)
667
668
669 L_verw_island_3:
670
671 pop %rax /* Matched to [A], above */
672
673 /*
674 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
675 * rcx user rip
676 * r11 user rflags
677 * rsp user stack pointer
678 */
679 pop %rcx
680 add $8, %rsp
681 pop %r11
682 verw 8(%rsp) /* verw operates on the %ss value already on the stack */
683 pop %rsp
684 sysretq /* return from system call */
685
686
687 L_64b_segops_island:
688
689 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
690 /* Exempt "known good" statically configured selectors, e.g. USER64_CS and 0 */
691 cmpl $(USER64_CS), R64_CS(%r15)
692 jz 11f
693 larw R64_CS(%r15), %ax
694 jnz L_64_reset_cs
695 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
696 testw $0x800, %ax
697 jz L_64_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */
698 jmp 11f
699 L_64_reset_cs:
700 movl $(USER64_CS), R64_CS(%r15)
701 11:
702 cmpl $0, R64_DS(%r15)
703 jz 22f
704 larw R64_DS(%r15), %ax
705 jz 22f
706 movl $0, R64_DS(%r15)
707 22:
708 cmpl $0, R64_ES(%r15)
709 jz 33f
710 larw R64_ES(%r15), %ax
711 jz 33f
712 movl $0, R64_ES(%r15)
713 33:
714 cmpl $0, R64_FS(%r15)
715 jz 44f
716 larw R64_FS(%r15), %ax
717 jz 44f
718 movl $0, R64_FS(%r15)
719 44:
720 cmpl $0, R64_GS(%r15)
721 jz 55f
722 larw R64_GS(%r15), %ax
723 jz 55f
724 movl $0, R64_GS(%r15)
725 55:
726 /*
727 * Pack the segment registers in %rax since (%r15) will not
728 * be accessible after the %cr3 switch.
729 * Only restore %gs if cthread_self is zero, (indicate
730 * this to the code below with a value of 0xffff)
731 */
732 mov %gs:CPU_ACTIVE_THREAD, %rax /* Get the active thread */
733 cmpq $0, TH_CTH_SELF(%rax)
734 je L_restore_gs
735 movw $0xFFFF, %ax
736 jmp 1f
737 L_restore_gs:
738 movw R64_GS(%r15), %ax
739 1:
740 shlq $16, %rax
741 movw R64_FS(%r15), %ax
742 shlq $16, %rax
743 movw R64_ES(%r15), %ax
744 shlq $16, %rax
745 movw R64_DS(%r15), %ax
746
747 /*
748 * Restore %r15, since we're done accessing saved state
749 * and (%r15) won't be accessible after the %cr3 switch.
750 */
751 mov R64_R15(%r15), %r15
752
753 /* Discover user cr3/ASID */
754 push %rax
755 mov %gs:CPU_UCR3, %rax
756 #if DEBUG
757 mov %rax, %gs:CPU_EXIT_CR3
758 #endif
759 mov %rax, %cr3
760 /* Continue execution on the shared/doublemapped trampoline */
761 pop %rax
762 swapgs
763
764 /*
765 * Returning to user; restore segment registers that might be used
766 * by compatibility-mode code in a 64-bit user process.
767 *
768 * Note that if we take a fault here, it's OK that we haven't yet
769 * popped %rax from the stack, because %rsp will be reset to
770 * the value pushed onto the exception stack (above).
771 */
772 movw %ax, %ds
773 shrq $16, %rax
774
775 movw %ax, %es
776 shrq $16, %rax
777
778 movw %ax, %fs
779 shrq $16, %rax
780
781 /*
782 * 0xFFFF is the sentinel set above that indicates we should
783 * not restore %gs (because GS.base was already set elsewhere
784 * (e.g.: in act_machine_set_pcb or machine_thread_set_tsd_base))
785 */
786 cmpw $0xFFFF, %ax
787 je L_chk_sysret
788 movw %ax, %gs /* Restore %gs to user-set value */
789 jmp L_chk_sysret
790
791
792 L_u64bit_entry_check:
793 /*
794 * Check we're not a confused 64-bit user.
795 */
796 pushq %rax
797 swapgs
798 leaq EXT(idt64_hndl_table0)(%rip), %rax
799 mov 16(%rax), %rax
800
801 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP(%rax)
802 jne L_64bit_entry_reject
803 jmp L_dispatch_kgsb
804
805 L_64bit_entry_reject:
806 /*
807 * Here for a 64-bit user attempting an invalid kernel entry.
808 */
809 movq $(HNDL_ALLTRAPS), 8+ISF64_TRAPFN(%rsp)
810 movq $(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
811 jmp L_dispatch_kgsb
812
813 /* End of double-mapped TEXT */
814 .text
815
816 Entry(ks_dispatch)
817 popq %rax
818 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
819 je EXT(ks_dispatch_kernel)
820
821 mov %rax, %gs:CPU_UBER_TMP
822 mov %gs:CPU_UBER_ISF, %rax
823 add $(ISF64_SIZE), %rax
824
825 xchg %rsp, %rax
826 /* Memory to memory moves (aint x86 wonderful):
827 * Transfer the exception frame from the per-CPU exception stack to the
828 * 'PCB' stack programmed at cswitch.
829 */
830 push ISF64_SS(%rax)
831 push ISF64_RSP(%rax)
832 push ISF64_RFLAGS(%rax)
833 push ISF64_CS(%rax)
834 push ISF64_RIP(%rax)
835 push ISF64_ERR(%rax)
836 push ISF64_TRAPFN(%rax)
837 push ISF64_TRAPNO(%rax)
838 mov %gs:CPU_UBER_TMP, %rax
839 jmp EXT(ks_dispatch_user)
840
841 Entry(ks_dispatch_user_with_pop_rax)
842 pop %rax
843 jmp EXT(ks_dispatch_user)
844
845 Entry (ks_return)
846 jmp .
847
848 Entry(ks_dispatch_user)
849 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
850 je L_dispatch_U32 /* 32-bit user task */
851
852 L_dispatch_U64:
853 subq $(ISS64_OFFSET), %rsp
854 mov %r15, R64_R15(%rsp)
855 mov %rsp, %r15
856 mov %gs:CPU_KERNEL_STACK, %rsp
857 jmp L_dispatch_64bit
858
859 Entry(ks_dispatch_kernel_with_pop_rax)
860 pop %rax
861 jmp EXT(ks_dispatch_kernel)
862
863 Entry(ks_dispatch_kernel)
864 subq $(ISS64_OFFSET), %rsp
865 mov %r15, R64_R15(%rsp)
866 mov %rsp, %r15
867
868 /*
869 * Here for 64-bit user task or kernel
870 */
871 L_dispatch_64bit:
872 movl $(SS_64), SS_FLAVOR(%r15)
873
874 /*
875 * Save segment regs if a 64-bit task has
876 * installed customized segments in the LDT
877 */
878 cmpl $0, %gs:CPU_CURTASK_HAS_LDT
879 je L_skip_save_extra_segregs
880
881 mov %ds, R64_DS(%r15)
882 mov %es, R64_ES(%r15)
883
884 L_skip_save_extra_segregs:
885 mov %fs, R64_FS(%r15)
886 mov %gs, R64_GS(%r15)
887
888
889 /* Save general-purpose registers */
890 mov %rax, R64_RAX(%r15)
891 mov %rbx, R64_RBX(%r15)
892 mov %rcx, R64_RCX(%r15)
893 mov %rdx, R64_RDX(%r15)
894 mov %rbp, R64_RBP(%r15)
895 mov %rdi, R64_RDI(%r15)
896 mov %rsi, R64_RSI(%r15)
897 mov %r8, R64_R8(%r15)
898 mov %r9, R64_R9(%r15)
899 mov %r10, R64_R10(%r15)
900 mov %r11, R64_R11(%r15)
901 mov %r12, R64_R12(%r15)
902 mov %r13, R64_R13(%r15)
903 mov %r14, R64_R14(%r15)
904
905 /* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
906 xor %ecx, %ecx
907 xor %edi, %edi
908 xor %r8, %r8
909 xor %r9, %r9
910 xor %r10, %r10
911 xor %r11, %r11
912 xor %r12, %r12
913 xor %r13, %r13
914 xor %r14, %r14
915
916 /* cr2 is significant only for page-faults */
917 mov %cr2, %rax
918 mov %rax, R64_CR2(%r15)
919
920 L_dispatch_U64_after_fault:
921 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
922 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
923 mov R64_CS(%r15), %esi /* %esi := cs for later */
924
925 jmp L_common_dispatch
926
927 L_dispatch_U32: /* 32-bit user task */
928 subq $(ISS64_OFFSET), %rsp
929 mov %rsp, %r15
930 mov %gs:CPU_KERNEL_STACK, %rsp
931 movl $(SS_32), SS_FLAVOR(%r15)
932
933 /*
934 * Save segment regs
935 */
936 mov %ds, R32_DS(%r15)
937 mov %es, R32_ES(%r15)
938 mov %fs, R32_FS(%r15)
939 mov %gs, R32_GS(%r15)
940
941 /*
942 * Save general 32-bit registers
943 */
944 mov %eax, R32_EAX(%r15)
945 mov %ebx, R32_EBX(%r15)
946 mov %ecx, R32_ECX(%r15)
947 mov %edx, R32_EDX(%r15)
948 mov %ebp, R32_EBP(%r15)
949 mov %esi, R32_ESI(%r15)
950 mov %edi, R32_EDI(%r15)
951
952 /* Unconditionally save cr2; only meaningful on page faults */
953 mov %cr2, %rax
954 mov %eax, R32_CR2(%r15)
955 /* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
956 xor %ecx, %ecx
957 xor %edi, %edi
958 xor %r8, %r8
959 xor %r9, %r9
960 xor %r10, %r10
961 xor %r11, %r11
962 xor %r12, %r12
963 xor %r13, %r13
964 xor %r14, %r14
965
966 /*
967 * Copy registers already saved in the machine state
968 * (in the interrupt stack frame) into the compat save area.
969 */
970 mov R64_RIP(%r15), %eax
971 mov %eax, R32_EIP(%r15)
972 mov R64_RFLAGS(%r15), %eax
973 mov %eax, R32_EFLAGS(%r15)
974 mov R64_RSP(%r15), %eax
975 mov %eax, R32_UESP(%r15)
976 mov R64_SS(%r15), %eax
977 mov %eax, R32_SS(%r15)
978 L_dispatch_U32_after_fault:
979 mov R64_CS(%r15), %esi /* %esi := %cs for later */
980 mov %esi, R32_CS(%r15)
981 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
982 mov %ebx, R32_TRAPNO(%r15)
983 mov R64_ERR(%r15), %eax
984 mov %eax, R32_ERR(%r15)
985 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
986
987 L_common_dispatch:
988 cld /* Ensure the direction flag is clear in the kernel */
989 cmpl $0, EXT(pmap_smap_enabled)(%rip)
990 je 1f
991 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
992 1:
993 /*
994 * We mark the kernel's cr3 as "active" for TLB coherency evaluation
995 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
996 * we switch to the kernel's address space on entry. Also,
997 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
998 * so that illicit accesses to userspace can be trapped.
999 */
1000 mov %gs:CPU_KERNEL_CR3, %rcx
1001 mov %rcx, %gs:CPU_ACTIVE_CR3
1002 test $3, %esi /* CS: user/kernel? */
1003 jz 2f /* skip CR3 reload if from kernel */
1004 xor %ebp, %ebp
1005 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
1006 jnz 11f
1007 cmpl $0, EXT(no_shared_cr3)(%rip)
1008 je 2f
1009 11:
1010 xor %eax, %eax
1011 movw %gs:CPU_KERNEL_PCID, %ax
1012 or %rax, %rcx
1013 mov %rcx, %cr3 /* load kernel cr3 */
1014 jmp 4f
1015 2:
1016 /* Deferred processing of pending kernel address space TLB invalidations */
1017 mov %gs:CPU_ACTIVE_CR3+4, %rcx
1018 shr $32, %rcx
1019 testl %ecx, %ecx
1020 jz 4f
1021 movl $0, %gs:CPU_TLB_INVALID
1022 cmpb $0, EXT(invpcid_enabled)(%rip)
1023 jz L_cr4_island
1024 movl $2, %ecx
1025 invpcid %gs:CPU_IP_DESC, %rcx
1026 4:
1027 L_set_act:
1028 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
1029 testq %rcx, %rcx
1030 je L_intcnt
1031 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
1032 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
1033 jnz L_dr7_island
1034 L_intcnt:
1035 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
1036 /* Dispatch the designated handler */
1037 cmp EXT(dblmap_base)(%rip), %rsp
1038 jb 66f
1039 cmp EXT(dblmap_max)(%rip), %rsp
1040 jge 66f
1041 subq EXT(dblmap_dist)(%rip), %rsp
1042 subq EXT(dblmap_dist)(%rip), %r15
1043 66:
1044 leaq EXT(idt64_hndl_table1)(%rip), %rax
1045 jmp *(%rax, %rdx, 8)
1046
1047 L_cr4_island:
1048 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
1049 and $(~CR4_PGE), %rcx
1050 mov %rcx, %cr4
1051 or $(CR4_PGE), %rcx
1052 mov %rcx, %cr4
1053 jmp L_set_act
1054 L_dr7_island:
1055 xor %ecx, %ecx /* If so, reset DR7 (the control) */
1056 mov %rcx, %dr7
1057 jmp L_intcnt
1058 /*
1059 * Control is passed here to return to user.
1060 */
1061 Entry(return_to_user)
1062 TIME_TRAP_UEXIT
1063
1064 Entry(ret_to_user)
1065 mov %gs:CPU_ACTIVE_THREAD, %rdx
1066 cmpq $0, TH_PCB_IDS(%rdx) /* Is there a debug register context? */
1067 jnz L_dr_restore_island
1068 L_post_dr_restore:
1069 /*
1070 * We now mark the task's address space as active for TLB coherency.
1071 * Handle special cases such as pagezero-less tasks here.
1072 */
1073 mov %gs:CPU_TASK_CR3, %rcx
1074 mov %rcx, %gs:CPU_ACTIVE_CR3
1075 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
1076 jnz L_cr3_switch_island
1077 movl EXT(no_shared_cr3)(%rip), %eax
1078 test %eax, %eax /* -no_shared_cr3 */
1079 jnz L_cr3_switch_island
1080
1081 L_cr3_switch_return:
1082 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
1083 cmp $0, %rax
1084 je 4f
1085 mov %rax, %dr7 /* Set DR7 */
1086 movq $0, %gs:CPU_DR7
1087 4:
1088 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
1089 je L_64bit_return
1090
1091 L_32bit_return:
1092 #if DEBUG_IDT64
1093 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
1094 je 1f
1095 cli
1096 POSTCODE2(0x6432)
1097 CCALL1(panic_idt64, %r15)
1098 1:
1099 #endif /* DEBUG_IDT64 */
1100
1101 /*
1102 * Restore registers into the machine state for iret.
1103 * Here on fault stack and PCB address in R11.
1104 */
1105 movl R32_EIP(%r15), %eax
1106 movl %eax, R64_RIP(%r15)
1107 movl R32_EFLAGS(%r15), %eax
1108 movl %eax, R64_RFLAGS(%r15)
1109 movl R32_CS(%r15), %eax
1110 movl %eax, R64_CS(%r15)
1111 movl R32_UESP(%r15), %eax
1112 movl %eax, R64_RSP(%r15)
1113 movl R32_SS(%r15), %eax
1114 movl %eax, R64_SS(%r15)
1115
1116 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
1117 /* Exempt "known good" statically configured selectors, e.g. USER_CS, USER_DS and 0 */
1118 cmpl $(USER_CS), R32_CS(%r15)
1119 jz 11f
1120 larw R32_CS(%r15), %ax
1121 jnz L_32_reset_cs
1122 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
1123 testw $0x800, %ax
1124 jz L_32_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */
1125 jmp 11f
1126 L_32_reset_cs:
1127 movl $(USER_CS), R32_CS(%r15)
1128 11:
1129 cmpl $(USER_DS), R32_DS(%r15)
1130 jz 22f
1131 cmpl $0, R32_DS(%r15)
1132 jz 22f
1133 larw R32_DS(%r15), %ax
1134 jz 22f
1135 movl $(USER_DS), R32_DS(%r15)
1136 22:
1137 cmpl $(USER_DS), R32_ES(%r15)
1138 jz 33f
1139 cmpl $0, R32_ES(%r15)
1140 jz 33f
1141 larw R32_ES(%r15), %ax
1142 jz 33f
1143 movl $(USER_DS), R32_ES(%r15)
1144 33:
1145 cmpl $(USER_DS), R32_FS(%r15)
1146 jz 44f
1147 cmpl $0, R32_FS(%r15)
1148 jz 44f
1149 larw R32_FS(%r15), %ax
1150 jz 44f
1151 movl $(USER_DS), R32_FS(%r15)
1152 44:
1153 cmpl $(USER_CTHREAD), R32_GS(%r15)
1154 jz 55f
1155 cmpl $0, R32_GS(%r15)
1156 jz 55f
1157 larw R32_GS(%r15), %ax
1158 jz 55f
1159 movl $(USER_CTHREAD), R32_GS(%r15)
1160 55:
1161 /*
1162 * Restore general 32-bit registers
1163 */
1164 movl R32_EAX(%r15), %eax
1165 movl R32_EBX(%r15), %ebx
1166 movl R32_ECX(%r15), %ecx
1167 movl R32_EDX(%r15), %edx
1168 movl R32_EBP(%r15), %ebp
1169 movl R32_ESI(%r15), %esi
1170 movl R32_EDI(%r15), %edi
1171
1172 /*
1173 * Restore segment registers. A segment exception taken here will
1174 * push state on the IST1 stack and will not affect the "PCB stack".
1175 */
1176 mov %r15, %rsp /* Set the PCB as the stack */
1177 movl %gs:CPU_NEED_SEGCHK, %r14d /* %r14 will be restored below */
1178 swapgs
1179
1180 /* Zero 64-bit-exclusive GPRs to prevent data leaks */
1181 xor %r8, %r8
1182 xor %r9, %r9
1183 xor %r10, %r10
1184 xor %r11, %r11
1185 xor %r12, %r12
1186 xor %r13, %r13
1187 xor %r15, %r15
1188
1189 movw R32_DS(%rsp), %ds
1190 movw R32_ES(%rsp), %es
1191 movw R32_FS(%rsp), %fs
1192 movw R32_GS(%rsp), %gs
1193
1194 /* pop compat frame + trapno, trapfn and error */
1195 add $(ISS64_OFFSET)+8+8+8, %rsp
1196
1197 /*
1198 * At this point, the stack contains:
1199 *
1200 * +--------------+
1201 * | Return SS | +32
1202 * | Return RSP | +24
1203 * | Return RFL | +16
1204 * | Return CS | +8
1205 * | Return RIP | <-- rsp
1206 * +--------------+
1207 */
1208
1209 cmpl $(SYSENTER_CS), 8(%rsp)
1210 /* test for sysexit */
1211 je L_rtu_via_sysexit
1212
1213 cmpl $1, %r14d
1214 je L_verw_island
1215
1216 L_after_verw:
1217 xor %r14, %r14
1218
1219 .globl EXT(ret32_iret)
1220 EXT(ret32_iret):
1221 iretq /* return from interrupt */
1222
1223 L_verw_island:
1224 verw 32(%rsp)
1225 jmp L_after_verw
1226
1227 L_verw_island_1:
1228 verw 16(%rsp)
1229 jmp L_after_verw_1
1230
1231 L_rtu_via_sysexit:
1232 pop %rdx /* user return eip */
1233 pop %rcx /* pop and toss cs */
1234 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
1235
1236 /*
1237 * %ss is now at 16(%rsp)
1238 */
1239 cmpl $1, %r14d
1240 je L_verw_island_1
1241 L_after_verw_1:
1242 xor %r14, %r14
1243
1244 popf /* flags - carry denotes failure */
1245 pop %rcx /* user return esp */
1246
1247
1248 sti /* interrupts enabled after sysexit */
1249 sysexitl /* 32-bit sysexit */
1250
1251 L_dr_restore_island:
1252 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
1253 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
1254 jne 1f
1255 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
1256 movq %rcx, %dr0
1257 movl DS_DR1(%rax), %ecx
1258 movq %rcx, %dr1
1259 movl DS_DR2(%rax), %ecx
1260 movq %rcx, %dr2
1261 movl DS_DR3(%rax), %ecx
1262 movq %rcx, %dr3
1263 movl DS_DR7(%rax), %ecx
1264 movq %rcx, %gs:CPU_DR7
1265 jmp 2f
1266 1:
1267 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
1268 mov %rcx, %dr0
1269 mov DS64_DR1(%rax), %rcx
1270 mov %rcx, %dr1
1271 mov DS64_DR2(%rax), %rcx
1272 mov %rcx, %dr2
1273 mov DS64_DR3(%rax), %rcx
1274 mov %rcx, %dr3
1275 mov DS64_DR7(%rax), %rcx
1276 mov %rcx, %gs:CPU_DR7
1277 2:
1278 jmp L_post_dr_restore
1279 L_cr3_switch_island:
1280 xor %eax, %eax
1281 movw %gs:CPU_ACTIVE_PCID, %ax
1282 or %rax, %rcx
1283 mov %rcx, %cr3
1284 jmp L_cr3_switch_return
1285
1286 ret_to_kernel:
1287 #if DEBUG_IDT64
1288 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
1289 je 1f
1290 cli
1291 POSTCODE2(0x6464)
1292 CCALL1(panic_idt64, %r15)
1293 hlt
1294 1:
1295 cmpl $(KERNEL64_CS), R64_CS(%r15)
1296 je 2f
1297 CCALL1(panic_idt64, %r15)
1298 hlt
1299 2:
1300 #endif
1301
1302 L_64bit_return:
1303 /*
1304 * Restore general 64-bit registers.
1305 * Here on fault stack and PCB address in R15.
1306 */
1307 leaq EXT(idt64_hndl_table0)(%rip), %rax
1308 jmp *8(%rax)
1309
1310 /* All 'exceptions' enter hndl_alltraps, with:
1311 * r15 x86_saved_state_t address
1312 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1313 * esi cs at trap
1314 *
1315 * The rest of the state is set up as:
1316 * both rsp and r15 are 16-byte aligned
1317 * interrupts disabled
1318 * direction flag cleared
1319 */
1320 Entry(hndl_alltraps)
1321 mov %esi, %eax
1322 testb $3, %al
1323 jz trap_from_kernel
1324
1325 TIME_TRAP_UENTRY
1326
1327 /* Check for active vtimers in the current task */
1328 mov %gs:CPU_ACTIVE_THREAD, %rcx
1329 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
1330 mov TH_TASK(%rcx), %rbx
1331 TASK_VTIMER_CHECK(%rbx, %rcx)
1332
1333 CCALL1(user_trap, %r15) /* call user trap routine */
1334 /* user_trap() unmasks interrupts */
1335 cli /* hold off intrs - critical section */
1336 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1337
1338
1339 Entry(return_from_trap)
1340 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
1341 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
1342 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1343 jz 1f
1344 xorq %rbp, %rbp /* clear framepointer */
1345 mov %r15, %rdi /* Set RDI to current thread */
1346 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
1347 1:
1348 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
1349 movl %gs:CPU_PENDING_AST,%eax
1350 testl %eax,%eax
1351 je EXT(return_to_user) /* branch if no AST */
1352
1353 L_return_from_trap_with_ast:
1354 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1355 je 2f /* no, go handle the AST */
1356 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
1357 je 1f
1358 /* no... 32-bit user mode */
1359 movl R32_EIP(%r15), %edi
1360 xorq %rbp, %rbp /* clear framepointer */
1361 CCALL(commpage_is_in_pfz32)
1362 testl %eax, %eax
1363 je 2f /* not in the PFZ... go service AST */
1364 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
1365 jmp EXT(return_to_user)
1366 1:
1367 movq R64_RIP(%r15), %rdi
1368 xorq %rbp, %rbp /* clear framepointer */
1369 CCALL(commpage_is_in_pfz64)
1370 testl %eax, %eax
1371 je 2f /* not in the PFZ... go service AST */
1372 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
1373 jmp EXT(return_to_user)
1374 2:
1375
1376 xorq %rbp, %rbp /* clear framepointer */
1377 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
1378
1379 cli
1380 mov %rsp, %r15 /* AST changes stack, saved state */
1381 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1382 jmp EXT(return_from_trap) /* and check again (rare) */
1383
1384 /*
1385 * Trap from kernel mode. No need to switch stacks.
1386 * Interrupts must be off here - we will set them to state at time of trap
1387 * as soon as it's safe for us to do so and not recurse doing preemption
1388 *
1389 */
1390 trap_from_kernel:
1391 movq %r15, %rdi /* saved state addr */
1392 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
1393 pushq %rbp /* Extend framepointer chain */
1394 movq %rsp, %rbp
1395 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
1396 popq %rbp
1397 addq $8, %rsp
1398 mov %rsp, %r15 /* DTrace slides stack/saved-state */
1399 cli
1400
1401 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1402 testl $(AST_URGENT),%eax /* any urgent preemption? */
1403 je ret_to_kernel /* no, nothing to do */
1404 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
1405 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
1406 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
1407 je ret_to_kernel
1408 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1409 jne ret_to_kernel
1410 movq %gs:CPU_KERNEL_STACK,%rax
1411 movq %rsp,%rcx
1412 xorq %rax,%rcx
1413 andq EXT(kernel_stack_mask)(%rip),%rcx
1414 testq %rcx,%rcx /* are we on the kernel stack? */
1415 jne ret_to_kernel /* no, skip it */
1416
1417 CCALL(ast_taken_kernel) /* take the AST */
1418
1419 mov %rsp, %r15 /* AST changes stack, saved state */
1420 jmp ret_to_kernel
1421
1422
1423 /*
1424 * All interrupts on all tasks enter here with:
1425 * r15 x86_saved_state_t
1426 * rsp kernel or interrupt stack
1427 * esi cs at trap
1428 *
1429 * both rsp and r15 are 16-byte aligned
1430 * interrupts disabled
1431 * direction flag cleared
1432 */
1433 Entry(hndl_allintrs)
1434 /*
1435 * test whether already on interrupt stack
1436 */
1437 movq %gs:CPU_INT_STACK_TOP,%rcx
1438 cmpq %rsp,%rcx
1439 jb 1f
1440 leaq -INTSTACK_SIZE(%rcx),%rdx
1441 cmpq %rsp,%rdx
1442 jb int_from_intstack
1443 1:
1444 xchgq %rcx,%rsp /* switch to interrupt stack */
1445
1446 mov %cr0,%rax /* get cr0 */
1447 orl $(CR0_TS),%eax /* or in TS bit */
1448 mov %rax,%cr0 /* set cr0 */
1449
1450 pushq %rcx /* save pointer to old stack */
1451 pushq %gs:CPU_INT_STATE /* save previous intr state */
1452 movq %r15,%gs:CPU_INT_STATE /* set intr state */
1453
1454 TIME_INT_ENTRY /* do timing */
1455
1456 /* Check for active vtimers in the current task */
1457 mov %gs:CPU_ACTIVE_THREAD, %rcx
1458 mov TH_TASK(%rcx), %rbx
1459 TASK_VTIMER_CHECK(%rbx, %rcx)
1460
1461 incl %gs:CPU_PREEMPTION_LEVEL
1462 incl %gs:CPU_INTERRUPT_LEVEL
1463
1464 CCALL1(interrupt, %r15) /* call generic interrupt routine */
1465
1466 .globl EXT(return_to_iret)
1467 LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1468
1469 decl %gs:CPU_INTERRUPT_LEVEL
1470 decl %gs:CPU_PREEMPTION_LEVEL
1471
1472 TIME_INT_EXIT /* do timing */
1473
1474 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1475 popq %rsp /* switch back to old stack */
1476
1477 movq %gs:CPU_ACTIVE_THREAD,%rax
1478 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
1479 cmpq $0,%rax /* Is there a context */
1480 je 1f /* Branch if not */
1481 movl FP_VALID(%rax),%eax /* Load fp_valid */
1482 cmpl $0,%eax /* Check if valid */
1483 jne 1f /* Branch if valid */
1484 clts /* Clear TS */
1485 jmp 2f
1486 1:
1487 mov %cr0,%rax /* get cr0 */
1488 orl $(CR0_TS),%eax /* or in TS bit */
1489 mov %rax,%cr0 /* set cr0 */
1490 2:
1491 /* Load interrupted code segment into %eax */
1492 movl R32_CS(%r15),%eax /* assume 32-bit state */
1493 cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
1494 #if DEBUG_IDT64
1495 jne 4f
1496 movl R64_CS(%r15),%eax /* 64-bit user mode */
1497 jmp 3f
1498 4:
1499 cmpl $(SS_32),SS_FLAVOR(%r15)
1500 je 3f
1501 POSTCODE2(0x6431)
1502 CCALL1(panic_idt64, %r15)
1503 hlt
1504 #else
1505 jne 3f
1506 movl R64_CS(%r15),%eax /* 64-bit user mode */
1507 #endif
1508 3:
1509 testb $3,%al /* user mode, */
1510 jnz ast_from_interrupt_user /* go handle potential ASTs */
1511 /*
1512 * we only want to handle preemption requests if
1513 * the interrupt fell in the kernel context
1514 * and preemption isn't disabled
1515 */
1516 movl %gs:CPU_PENDING_AST,%eax
1517 testl $(AST_URGENT),%eax /* any urgent requests? */
1518 je ret_to_kernel /* no, nothing to do */
1519
1520 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1521 jne ret_to_kernel /* yes, skip it */
1522
1523 /*
1524 * Take an AST from kernel space. We don't need (and don't want)
1525 * to do as much as the case where the interrupt came from user
1526 * space.
1527 */
1528 CCALL(ast_taken_kernel)
1529
1530 mov %rsp, %r15 /* AST changes stack, saved state */
1531 jmp ret_to_kernel
1532
1533
1534 /*
1535 * nested int - simple path, can't preempt etc on way out
1536 */
1537 int_from_intstack:
1538 incl %gs:CPU_PREEMPTION_LEVEL
1539 incl %gs:CPU_INTERRUPT_LEVEL
1540 incl %gs:CPU_NESTED_ISTACK
1541
1542 push %gs:CPU_INT_STATE
1543 mov %r15, %gs:CPU_INT_STATE
1544
1545 CCALL1(interrupt, %r15)
1546
1547 pop %gs:CPU_INT_STATE
1548
1549 decl %gs:CPU_INTERRUPT_LEVEL
1550 decl %gs:CPU_PREEMPTION_LEVEL
1551 decl %gs:CPU_NESTED_ISTACK
1552
1553 jmp ret_to_kernel
1554
1555 /*
1556 * Take an AST from an interrupted user
1557 */
1558 ast_from_interrupt_user:
1559 movl %gs:CPU_PENDING_AST,%eax
1560 testl %eax,%eax /* pending ASTs? */
1561 je EXT(ret_to_user) /* no, nothing to do */
1562
1563 TIME_TRAP_UENTRY
1564
1565 movl $1, %ecx /* check if we're in the PFZ */
1566 jmp L_return_from_trap_with_ast /* return */
1567
1568
1569 /* Syscall dispatch routines! */
1570
1571 /*
1572 *
1573 * 32bit Tasks
1574 * System call entries via INTR_GATE or sysenter:
1575 *
1576 * r15 x86_saved_state32_t
1577 * rsp kernel stack
1578 *
1579 * both rsp and r15 are 16-byte aligned
1580 * interrupts disabled
1581 * direction flag cleared
1582 */
1583
1584 Entry(hndl_sysenter)
1585 /*
1586 * We can be here either for a mach syscall or a unix syscall,
1587 * as indicated by the sign of the code:
1588 */
1589 movl R32_EAX(%r15),%eax
1590 testl %eax,%eax
1591 js EXT(hndl_mach_scall) /* < 0 => mach */
1592 /* > 0 => unix */
1593
1594 Entry(hndl_unix_scall)
1595
1596 TIME_TRAP_UENTRY
1597
1598 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1599 movq TH_TASK(%rcx),%rbx /* point to current task */
1600 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1601
1602 /* Check for active vtimers in the current task */
1603 TASK_VTIMER_CHECK(%rbx,%rcx)
1604
1605 sti
1606
1607 CCALL1(unix_syscall, %r15)
1608 /*
1609 * always returns through thread_exception_return
1610 */
1611
1612
1613 Entry(hndl_mach_scall)
1614 TIME_TRAP_UENTRY
1615
1616 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1617 movq TH_TASK(%rcx),%rbx /* point to current task */
1618 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1619
1620 /* Check for active vtimers in the current task */
1621 TASK_VTIMER_CHECK(%rbx,%rcx)
1622
1623 sti
1624
1625 CCALL1(mach_call_munger, %r15)
1626 /*
1627 * always returns through thread_exception_return
1628 */
1629
1630
1631 Entry(hndl_mdep_scall)
1632 TIME_TRAP_UENTRY
1633
1634 /* Check for active vtimers in the current task */
1635 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1636 movq TH_TASK(%rcx),%rbx /* point to current task */
1637 TASK_VTIMER_CHECK(%rbx,%rcx)
1638
1639 sti
1640
1641 CCALL1(machdep_syscall, %r15)
1642 /*
1643 * always returns through thread_exception_return
1644 */
1645
1646 /*
1647 * 64bit Tasks
1648 * System call entries via syscall only:
1649 *
1650 * r15 x86_saved_state64_t
1651 * rsp kernel stack
1652 *
1653 * both rsp and r15 are 16-byte aligned
1654 * interrupts disabled
1655 * direction flag cleared
1656 */
1657
1658 Entry(hndl_syscall)
1659 TIME_TRAP_UENTRY
1660
1661 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
1662 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
1663 movq TH_TASK(%rcx),%rbx /* point to current task */
1664
1665 /* Check for active vtimers in the current task */
1666 TASK_VTIMER_CHECK(%rbx,%rcx)
1667
1668 /*
1669 * We can be here either for a mach, unix machdep or diag syscall,
1670 * as indicated by the syscall class:
1671 */
1672 movl R64_RAX(%r15), %eax /* syscall number/class */
1673 movl %eax, %edx
1674 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1675 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1676 je EXT(hndl_mach_scall64)
1677 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1678 je EXT(hndl_unix_scall64)
1679 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1680 je EXT(hndl_mdep_scall64)
1681 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1682 je EXT(hndl_diag_scall64)
1683
1684 /* Syscall class unknown */
1685 sti
1686 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1687 /* no return */
1688
1689
1690 Entry(hndl_unix_scall64)
1691 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
1692 sti
1693
1694 CCALL1(unix_syscall64, %r15)
1695 /*
1696 * always returns through thread_exception_return
1697 */
1698
1699
1700 Entry(hndl_mach_scall64)
1701 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
1702 sti
1703
1704 CCALL1(mach_call_munger64, %r15)
1705 /*
1706 * always returns through thread_exception_return
1707 */
1708
1709
1710
1711 Entry(hndl_mdep_scall64)
1712 sti
1713
1714 CCALL1(machdep_syscall64, %r15)
1715 /*
1716 * always returns through thread_exception_return
1717 */
1718
1719 Entry(hndl_diag_scall64)
1720 CCALL1(diagCall64, %r15) // Call diagnostics
1721 test %eax, %eax // What kind of return is this?
1722 je 1f // - branch if bad (zero)
1723 jmp EXT(return_to_user) // Normal return, do not check asts...
1724 1:
1725 sti
1726 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1727 /* no return */
1728 /* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
1729 Entry(hndl_machine_check)
1730 /* Adjust SP and savearea to their canonical, non-aliased addresses */
1731 CCALL1(panic_machine_check64, %r15)
1732 hlt
1733
1734 Entry(hndl_double_fault)
1735 CCALL1(panic_double_fault64, %r15)
1736 hlt