]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/idt64.s
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / x86_64 / idt64.s
CommitLineData
b0d623f7 1/*
f427ee49 2 * Copyright (c) 2010-2020 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <i386/asm.h>
29#include <assym.s>
39236c6e 30#include <debug.h>
f427ee49 31#include "dwarf_unwind.h"
b0d623f7 32#include <i386/eflags.h>
6d2010ae 33#include <i386/rtclock_asm.h>
b0d623f7
A
34#include <i386/trap.h>
35#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
36#include <mach/i386/syscall_sw.h>
37#include <i386/postcode.h>
38#include <i386/proc_reg.h>
39#include <mach/exception_types.h>
40
41#if DEBUG
42#define DEBUG_IDT64 1
43#endif
44
45/*
46 * This is the low-level trap and interrupt handling code associated with
47 * the IDT. It also includes system call handlers for sysenter/syscall.
48 * The IDT itself is defined in mp_desc.c.
49 *
50 * Code here is structured as follows:
51 *
52 * stubs Code called directly from an IDT vector.
53 * All entry points have the "idt64_" prefix and they are built
54 * using macros expanded by the inclusion of idt_table.h.
55 * This code performs vector-dependent identification and jumps
56 * into the dispatch code.
57 *
58 * dispatch The dispatch code is responsible for saving the thread state
59 * (which is either 64-bit or 32-bit) and then jumping to the
60 * class handler identified by the stub.
61 *
62 * returns Code to restore state and return to the previous context.
63 *
64 * handlers There are several classes of handlers:
65 * interrupt - asynchronous events typically from external devices
66 * trap - synchronous events due to thread execution
67 * syscall - synchronous system call request
68 * fatal - fatal traps
69 */
b0d623f7 70/*
5c9f4661 71 * Indices of handlers for each exception type.
b0d623f7 72 */
5c9f4661
A
73#define HNDL_ALLINTRS 0
74#define HNDL_ALLTRAPS 1
75#define HNDL_SYSENTER 2
76#define HNDL_SYSCALL 3
77#define HNDL_UNIX_SCALL 4
78#define HNDL_MACH_SCALL 5
79#define HNDL_MDEP_SCALL 6
80#define HNDL_DOUBLE_FAULT 7
81#define HNDL_MACHINE_CHECK 8
82
f427ee49 83
5c9f4661
A
84/* Begin double-mapped descriptor section */
85
86.section __HIB, __desc
87.globl EXT(idt64_hndl_table0)
88EXT(idt64_hndl_table0):
d9a64523
A
89/* 0x00 */ .quad EXT(ks_dispatch)
90/* 0x08 */ .quad EXT(ks_64bit_return)
91/* 0x10 */ .quad 0 /* Populated with CPU shadow displacement*/
cb323159 92/* 0x18 */ .quad EXT(ks_32bit_return)
d9a64523
A
93#define TBL0_OFF_DISP_USER_WITH_POPRAX 0x20
94/* 0x20 */ .quad EXT(ks_dispatch_user_with_pop_rax)
95#define TBL0_OFF_DISP_KERN_WITH_POPRAX 0x28
96/* 0x28 */ .quad EXT(ks_dispatch_kernel_with_pop_rax)
97#define TBL0_OFF_PTR_KERNEL_STACK_MASK 0x30
98/* 0x30 */ .quad 0 /* &kernel_stack_mask */
5c9f4661
A
99
100EXT(idt64_hndl_table1):
101 .quad EXT(hndl_allintrs)
102 .quad EXT(hndl_alltraps)
103 .quad EXT(hndl_sysenter)
104 .quad EXT(hndl_syscall)
105 .quad EXT(hndl_unix_scall)
106 .quad EXT(hndl_mach_scall)
107 .quad EXT(hndl_mdep_scall)
108 .quad EXT(hndl_double_fault)
109 .quad EXT(hndl_machine_check)
110.text
111
b0d623f7
A
112
113/* The wrapper for all non-special traps/interrupts */
114/* Everything up to PUSH_FUNCTION is just to output
115 * the interrupt number out to the postcode display
116 */
117#if DEBUG_IDT64
118#define IDT_ENTRY_WRAPPER(n, f) \
119 push %rax ;\
120 POSTCODE2(0x6400+n) ;\
121 pop %rax ;\
5c9f4661 122 pushq $(f) ;\
b0d623f7
A
123 pushq $(n) ;\
124 jmp L_dispatch
125#else
126#define IDT_ENTRY_WRAPPER(n, f) \
5c9f4661 127 pushq $(f) ;\
b0d623f7
A
128 pushq $(n) ;\
129 jmp L_dispatch
130#endif
131
132/* A trap that comes with an error code already on the stack */
133#define TRAP_ERR(n, f) \
134 Entry(f) ;\
135 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
136
137/* A normal trap */
138#define TRAP(n, f) \
139 Entry(f) ;\
140 pushq $0 ;\
141 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
142
143#define USER_TRAP TRAP
144
145/* An interrupt */
146#define INTERRUPT(n) \
147 Entry(_intr_ ## n) ;\
148 pushq $0 ;\
149 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
150
151/* A trap with a special-case handler, hence we don't need to define anything */
152#define TRAP_SPC(n, f)
39236c6e
A
153#define TRAP_IST1(n, f)
154#define TRAP_IST2(n, f)
b0d623f7
A
155#define USER_TRAP_SPC(n, f)
156
5c9f4661
A
157/* Begin double-mapped text section */
158.section __HIB, __text
b0d623f7
A
159/* Generate all the stubs */
160#include "idt_table.h"
161
5c9f4661
A
162Entry(idt64_page_fault)
163 pushq $(HNDL_ALLTRAPS)
f427ee49
A
164#if !(DEVELOPMENT || DEBUG)
165 pushq $(T_PAGE_FAULT)
5c9f4661 166 jmp L_dispatch
f427ee49
A
167#else
168 pushq $(T_PAGE_FAULT)
169
170 pushq %rax
171 pushq %rbx
172 pushq %rcx
173 testb $3, 8+8+8+ISF64_CS(%rsp) /* Coming from userspace? */
174 jz L_pfkern /* No? (relatively uncommon), goto L_pfkern */
175
176 /*
177 * We faulted from the user; if the fault address is at the user's %rip,
178 * abort trying to save the cacheline since that adds another page fault's
179 * overhead when we recover, below.
180 */
181 movq 8+8+8+ISF64_RIP(%rsp), %rbx
182 movq %cr2, %rcx
183 cmpq %rbx, %rcx
184
185 /* note that the next 3 instructions do not affect RFLAGS */
186 swapgs
187 leaq EXT(idt64_hndl_table0)(%rip), %rax
188 mov 16(%rax), %rax /* Offset of per-CPU shadow */
189
190 jne L_dispatch_from_user_with_rbx_rcx_pushes
191 jmp abort_rip_cacheline_read
192
193L_pfkern:
194 /*
195 * Kernel page fault
196 * If the fault occurred on while reading from the user's code cache line, abort the cache line read;
197 * otherwise, treat this as a regular kernel fault
198 */
199 movq 8+8+8+ISF64_RIP(%rsp), %rbx
200 leaq rip_cacheline_read(%rip), %rcx
201 cmpq %rcx, %rbx
202 jb regular_kernel_page_fault
203 leaq rip_cacheline_read_end(%rip), %rcx
204 cmpq %rcx, %rbx
205 jbe L_pf_on_clread /* Did we hit a #PF within the cacheline read? */
206
207regular_kernel_page_fault:
208 /* No, regular kernel #PF */
209 popq %rcx
210 popq %rbx
211 jmp L_dispatch_from_kernel_no_push_rax
212
213L_pf_on_clread:
214 /*
215 * We faulted while trying to read user instruction memory at the parent fault's %rip; abort that action by
216 * changing the return address on the stack, restoring cr2 to its previous value, peeling off the pushes we
217 * added on entry to the page fault handler, then performing an iretq
218 */
219 popq %rcx
220 movq %rcx, %cr2
221 popq %rbx
222 leaq abort_rip_cacheline_read(%rip), %rax
223 movq %rax, 8+ISF64_RIP(%rsp)
224 popq %rax
225 addq $24, %rsp /* pop the 2 pushes + the error code */
226 iretq /* Resume previous trap/fault processing */
227#endif /* !(DEVELOPMENT || DEBUG) */
5c9f4661 228
9d749ea3
A
229/*
230 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the
231 * kernel while not on the kernel's gsbase.
232 */
5c9f4661 233Entry(idt64_debug)
9d749ea3 234 /* Synthesize common interrupt stack frame */
5c9f4661
A
235 push $0 /* error code */
236 pushq $(HNDL_ALLTRAPS)
237 pushq $(T_DEBUG)
9d749ea3
A
238 /* Spill prior to RDMSR */
239 push %rax
240 push %rcx
241 push %rdx
242 mov $(MSR_IA32_GS_BASE), %ecx
243 rdmsr /* Check contents of GSBASE MSR */
244 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */
245 jnz 1f
246
247 /*
248 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space,
249 * it must have happened within the very small window on entry or exit before or after (respectively)
250 * swapgs occurred. In those cases, consider the #DB spurious and immediately return.
251 */
252 testb $3, 8+8+8+ISF64_CS(%rsp)
253 jnz 2f
254 pop %rdx
255 pop %rcx
256 pop %rax
257 addq $0x18, %rsp /* Remove synthesized interrupt stack frame */
258 jmp EXT(ret64_iret)
2592:
260 swapgs /* direct from user */
2611:
262 pop %rdx
263
264 leaq EXT(idt64_hndl_table0)(%rip), %rax
265 mov 16(%rax), %rax /* Offset of per-CPU shadow */
0a7de745
A
266
267 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
9d749ea3
A
268 mov %rax, %cr3
269
270 pop %rcx
271
272 /* Note that %rax will be popped from the stack in ks_dispatch, below */
273
274 leaq EXT(idt64_hndl_table0)(%rip), %rax
275 jmp *(%rax)
276
5c9f4661
A
277/*
278 * Legacy interrupt gate System call handlers.
279 * These are entered via a syscall interrupt. The system call number in %rax
280 * is saved to the error code slot in the stack frame. We then branch to the
281 * common state saving code.
282 */
283
284#ifndef UNIX_INT
285#error NO UNIX INT!!!
286#endif
287Entry(idt64_unix_scall)
288 pushq %rax /* save system call number */
289 pushq $(HNDL_UNIX_SCALL)
290 pushq $(UNIX_INT)
d9a64523 291 jmp L_u64bit_entry_check
5c9f4661
A
292
293Entry(idt64_mach_scall)
294 pushq %rax /* save system call number */
295 pushq $(HNDL_MACH_SCALL)
296 pushq $(MACH_INT)
d9a64523 297 jmp L_u64bit_entry_check
5c9f4661
A
298
299Entry(idt64_mdep_scall)
300 pushq %rax /* save system call number */
301 pushq $(HNDL_MDEP_SCALL)
302 pushq $(MACHDEP_INT)
d9a64523 303 jmp L_u64bit_entry_check
5c9f4661
A
304
305/*
306 * For GP/NP/SS faults, we use the IST1 stack.
307 * For faults from user-space, we have to copy the machine state to the
308 * PCB stack and then dispatch as normal.
309 * For faults in kernel-space, we need to scrub for kernel exit faults and
310 * treat these as user-space faults. But for all other kernel-space faults
cb323159 311 * we continue to run on the IST1 stack as we dispatch to handle the fault
5c9f4661
A
312 * as fatal.
313 */
cb323159
A
314Entry(idt64_segnp)
315 pushq $(HNDL_ALLTRAPS)
316 pushq $(T_SEGMENT_NOT_PRESENT)
317 jmp L_check_for_kern_flt
318
5c9f4661
A
319Entry(idt64_gen_prot)
320 pushq $(HNDL_ALLTRAPS)
321 pushq $(T_GENERAL_PROTECTION)
0a7de745 322 jmp L_check_for_kern_flt
5c9f4661
A
323
324Entry(idt64_stack_fault)
325 pushq $(HNDL_ALLTRAPS)
326 pushq $(T_STACK_FAULT)
0a7de745
A
327 jmp L_check_for_kern_flt
328
329L_check_for_kern_flt:
330 /*
331 * If we took a #GP or #SS from the kernel, check if we took them
332 * from either ret32_iret or ret64_iret. If we did, we need to
333 * jump into L_dispatch at the swapgs so that the code in L_dispatch
334 * can proceed with the correct GSbase.
335 */
336 pushq %rax
337 testb $3, 8+ISF64_CS(%rsp)
338 jnz L_dispatch_from_user_no_push_rax /* Fault from user, go straight to dispatch */
cb323159
A
339
340 /* Check if the fault occurred in the 32-bit segment restoration window (which executes with user gsb) */
341 leaq L_32bit_seg_restore_begin(%rip), %rax
342 cmpq %rax, 8+ISF64_RIP(%rsp)
343 jb L_not_32bit_segrestores
344 leaq L_32bit_seg_restore_done(%rip), %rax
345 cmpq %rax, 8+ISF64_RIP(%rsp)
346 jae L_not_32bit_segrestores
347 jmp 1f
348L_not_32bit_segrestores:
0a7de745
A
349 leaq EXT(ret32_iret)(%rip), %rax
350 cmpq %rax, 8+ISF64_RIP(%rsp)
351 je 1f
352 leaq EXT(ret64_iret)(%rip), %rax
353 cmpq %rax, 8+ISF64_RIP(%rsp)
354 je 1f
355 jmp L_dispatch_from_kernel_no_push_rax
356 /*
357 * We hit the fault on iretq, so check the original return %cs. If
358 * it's a user %cs, fixup the stack and then jump to dispatch..
359 *
360 * With this type of fault, the stack is layed-out as follows:
361 *
362 *
363 * orig %ss saved_rsp+32
364 * orig %rsp saved_rsp+24
365 * orig %rflags saved_rsp+16
366 * orig %cs saved_rsp+8
367 * orig %rip saved_rsp
368 * ^^^^^^^^^ (maybe on another stack, since we switched to IST1)
369 * %ss +64 -8
370 * saved_rsp +56 -16
371 * %rflags +48 -24
372 * %cs +40 -32
373 * %rip +32 -40
374 * error code +24 -48
375 * hander +16 -56
376 * trap number +8 -64
377 * <saved %rax> <== %rsp -72
378 */
3791:
380 pushq %rbx
381 movq 16+ISF64_RSP(%rsp), %rbx
382 movq ISF64_CS-24(%rbx), %rax
383 testb $3, %al /* If the original return destination was to user */
384 jnz 2f
385 popq %rbx
386 jmp L_dispatch_from_kernel_no_push_rax /* Fault occurred when trying to return to kernel */
3872:
388 /*
389 * Fix the stack so the original trap frame is current, then jump to dispatch
390 */
cb323159 391
0a7de745
A
392 movq %rax, 16+ISF64_CS(%rsp)
393
394 movq ISF64_RSP-24(%rbx), %rax
395 movq %rax, 16+ISF64_RSP(%rsp)
396
397 movq ISF64_RIP-24(%rbx), %rax
398 movq %rax, 16+ISF64_RIP(%rsp)
399
400 movq ISF64_SS-24(%rbx), %rax
401 movq %rax, 16+ISF64_SS(%rsp)
402
403 movq ISF64_RFLAGS-24(%rbx), %rax
404 movq %rax, 16+ISF64_RFLAGS(%rsp)
405
406 popq %rbx
407 jmp L_dispatch_from_user_no_push_rax
5c9f4661 408
5c9f4661
A
409
410/*
411 * Fatal exception handlers:
412 */
413Entry(idt64_db_task_dbl_fault)
414 pushq $(HNDL_DOUBLE_FAULT)
415 pushq $(T_DOUBLE_FAULT)
416 jmp L_dispatch
417
418Entry(idt64_db_task_stk_fault)
419 pushq $(HNDL_DOUBLE_FAULT)
420 pushq $(T_STACK_FAULT)
421 jmp L_dispatch
422
423Entry(idt64_mc)
424 push $(0) /* Error */
425 pushq $(HNDL_MACHINE_CHECK)
426 pushq $(T_MACHINE_CHECK)
427 jmp L_dispatch
428
429/*
430 * NMI
431 * This may or may not be fatal but extreme care is required
432 * because it may fall when control was already in another trampoline.
433 *
a39ff7e2
A
434 * We get here on IST2 stack which is used exclusively for NMIs.
435 * Machine checks, doublefaults and similar use IST1
5c9f4661
A
436 */
437Entry(idt64_nmi)
a39ff7e2
A
438 push %rax
439 push %rcx
440 push %rdx
d9a64523
A
441 testb $3, ISF64_CS(%rsp)
442 jz 1f
443
444 /* From user-space: copy interrupt state to user PCB */
445 swapgs
446
447 leaq EXT(idt64_hndl_table0)(%rip), %rax
448 mov 16(%rax), %rax /* Offset of per-CPU shadow */
0a7de745 449 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
d9a64523
A
450 mov %rax, %cr3 /* note that SMAP is enabled in L_common_dispatch (on Broadwell+) */
451
452 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
453 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
454
455 leaq TBL0_OFF_DISP_USER_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_user_with_pop_rax */
456 jmp 4f /* Copy state to PCB */
457
4581:
459 /*
460 * From kernel-space:
461 * Determine whether the kernel or user GS is set.
462 * Sets the high 32 bits of the return CS to 1 to ensure that we'll swapgs back correctly at IRET.
463 */
a39ff7e2 464 mov $(MSR_IA32_GS_BASE), %ecx
d9a64523
A
465 rdmsr /* read kernel gsbase */
466 test $0x80000000, %edx /* test MSB of address */
467 jnz 2f
468 swapgs /* so swap */
469 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
4702:
a39ff7e2
A
471
472 leaq EXT(idt64_hndl_table0)(%rip), %rax
473 mov 16(%rax), %rax /* Offset of per-CPU shadow */
d9a64523 474 mov %cr3, %rdx
0a7de745 475 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
a39ff7e2 476 mov %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */
d9a64523
A
477
478 /*
479 * Determine whether we're on the kernel or interrupt stack
480 * when the NMI hit.
481 */
482 mov ISF64_RSP(%rsp), %rcx
483 mov %gs:CPU_KERNEL_STACK, %rax
484 xor %rcx, %rax
485 movq TBL0_OFF_PTR_KERNEL_STACK_MASK+EXT(idt64_hndl_table0)(%rip), %rdx
486 mov (%rdx), %rdx /* Load kernel_stack_mask */
487 and %rdx, %rax
488 test %rax, %rax /* are we on the kernel stack? */
489 jz 3f /* yes */
490
491 mov %gs:CPU_INT_STACK_TOP, %rax
492 cmp %rcx, %rax /* are we on the interrupt stack? */
493 jb 5f /* no */
494 leaq -INTSTACK_SIZE(%rax), %rax
495 cmp %rcx, %rax
496 jb 3f /* yes */
4975:
498 mov %gs:CPU_KERNEL_STACK, %rcx
4993:
500 /* 16-byte-align kernel/interrupt stack for state push */
501 and $0xFFFFFFFFFFFFFFF0, %rcx
502
503 leaq TBL0_OFF_DISP_KERN_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_kernel_with_pop_rax */
5044:
505 /*
506 * Copy state from NMI stack (RSP) to the save area (RCX) which is
507 * the PCB for user or kernel/interrupt stack from kernel.
508 * ISF64_ERR(RSP) saved RAX
509 * ISF64_TRAPFN(RSP) saved RCX
510 * ISF64_TRAPNO(RSP) saved RDX
511 */
512 xchg %rsp, %rcx /* set for pushes */
513 push ISF64_SS(%rcx)
514 push ISF64_RSP(%rcx)
515 push ISF64_RFLAGS(%rcx)
516 push ISF64_CS(%rcx)
517 push ISF64_RIP(%rcx)
518 /* Synthesize common interrupt stack frame */
519 push $(0) /* error code 0 */
520 push $(HNDL_ALLINTRS) /* trapfn allintrs */
521 push $(T_NMI) /* trapno T_NMI */
522 push ISF64_ERR(%rcx) /* saved %rax is popped in ks_dispatch_{kernel|user}_with_pop_rax */
523 mov ISF64_TRAPNO(%rcx), %rdx
524 mov ISF64_TRAPFN(%rcx), %rcx
525
526 jmp *(%rax) /* ks_dispatch_{kernel|user}_with_pop_rax */
5c9f4661
A
527
528Entry(idt64_double_fault)
529 pushq $(HNDL_DOUBLE_FAULT)
530 pushq $(T_DOUBLE_FAULT)
531 jmp L_dispatch
532
533Entry(hi64_syscall)
534Entry(idt64_syscall)
535 swapgs
536 /* Use RAX as a temporary by shifting its contents into R11[32:63]
537 * The systemcall number is defined to be a 32-bit quantity, as is
538 * RFLAGS.
539 */
540 shlq $32, %rax
541 or %rax, %r11
542.globl EXT(dblsyscall_patch_point)
543EXT(dblsyscall_patch_point):
544// movabsq $0x12345678ABCDEFFFULL, %rax
545 /* Generate offset to the double-mapped per-CPU data shadow
546 * into RAX
547 */
548 leaq EXT(idt64_hndl_table0)(%rip), %rax
549 mov 16(%rax), %rax
550 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
551 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
552 sub $(ISF64_SIZE), %rsp
553
554 /*
555 * Synthesize an ISF frame on the exception stack
556 */
557 movl $(USER_DS), ISF64_SS(%rsp)
558 mov %rcx, ISF64_RIP(%rsp) /* rip */
559
560 mov %gs:CPU_UBER_TMP(%rax), %rcx
561 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
562
563 mov %r11, %rax
564 shrq $32, %rax /* Restore RAX */
565 mov %r11d, %r11d /* Clear r11[32:63] */
566
567 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
568 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
569 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
570 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
571 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
572 swapgs
573 jmp L_dispatch /* this can only be 64-bit */
574
575Entry(hi64_sysenter)
576Entry(idt64_sysenter)
577 /* Synthesize an interrupt stack frame onto the
578 * exception stack.
579 */
580 push $(USER_DS) /* ss */
581 push %rcx /* uesp */
582 pushf /* flags */
583 /*
584 * Clear, among others, the Nested Task (NT) flags bit;
585 * this is zeroed by INT, but not by SYSENTER.
586 */
587 push $0
588 popf
589 push $(SYSENTER_CS) /* cs */
590L_sysenter_continue:
591 push %rdx /* eip */
592 push %rax /* err/eax - syscall code */
593 pushq $(HNDL_SYSENTER)
594 pushq $(T_SYSENTER)
595 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
d9a64523 596 jmp L_u64bit_entry_check
5c9f4661 597
f427ee49
A
598#if DEVELOPMENT || DEBUG
599do_cacheline_stash:
600 /*
601 * Copy the cache line that includes the user's EIP/RIP into the shadow cpu structure
602 * for later extraction/sanity-checking in user_trap().
603 */
604
605 pushq %rbx
606 pushq %rcx
607L_dispatch_from_user_with_rbx_rcx_pushes:
608 movq 8+8+8+ISF64_RIP(%rsp), %rbx
609 andq $-64, %rbx /* Round address to cacheline boundary */
610 pushf
611 /*
612 * disable SMAP, if it's enabled (note that CLAC is present in BDW and later only, so we're
613 * using generic instructions here without checking whether the CPU supports SMAP first)
614 */
615 orq $(1 << 18), (%rsp)
616 popf
617 /*
618 * Note that we only check for a faulting read on the first read, since if the first read
619 * succeeds, the rest of the cache line should also be readible since we are running with
620 * interrupts disabled here and a TLB invalidation cannot sneak in and pull the rug out.
621 */
622 movq %cr2, %rcx /* stash the original %cr2 in case the first cacheline read triggers a #PF */
623 /* This value of %cr2 is restored in the page fault handler if it detects */
624 /* that the fault occurrent on the next instruction, so the original #PF can */
625 /* continue to be handled without issue. */
626rip_cacheline_read:
627 mov (%rbx), %rcx
628 /* Note that CPU_RTIMES in the shadow cpu struct was just a convenient place to stash the cacheline */
629 mov %rcx, %gs:CPU_RTIMES(%rax)
630 movq %cr2, %rcx
631 mov 8(%rbx), %rcx
632 mov %rcx, %gs:8+CPU_RTIMES(%rax)
633 movq %cr2, %rcx
634 mov 16(%rbx), %rcx
635 mov %rcx, %gs:16+CPU_RTIMES(%rax)
636 movq %cr2, %rcx
637 mov 24(%rbx), %rcx
638 mov %rcx, %gs:24+CPU_RTIMES(%rax)
639 movq %cr2, %rcx
640 mov 32(%rbx), %rcx
641 mov %rcx, %gs:32+CPU_RTIMES(%rax)
642 movq %cr2, %rcx
643 mov 40(%rbx), %rcx
644 mov %rcx, %gs:40+CPU_RTIMES(%rax)
645 movq %cr2, %rcx
646 mov 48(%rbx), %rcx
647 mov %rcx, %gs:48+CPU_RTIMES(%rax)
648 movq %cr2, %rcx
649rip_cacheline_read_end:
650 mov 56(%rbx), %rcx
651 mov %rcx, %gs:56+CPU_RTIMES(%rax)
652
653 pushf
654 andq $~(1 << 18), (%rsp) /* reenable SMAP */
655 popf
656
657 jmp cacheline_read_cleanup_stack
658
659abort_rip_cacheline_read:
660 pushf
661 andq $~(1 << 18), (%rsp) /* reenable SMAP */
662 popf
663abort_rip_cacheline_read_no_smap_reenable:
664 movl $0xdeadc0de, %ecx /* Write a sentinel so higher-level code knows this was aborted */
665 shlq $32, %rcx
666 movl $0xbeefcafe, %ebx
667 orq %rbx, %rcx
668 movq %rcx, %gs:CPU_RTIMES(%rax)
669 movq %rcx, %gs:8+CPU_RTIMES(%rax)
670
671cacheline_read_cleanup_stack:
672 popq %rcx
673 popq %rbx
674 jmp L_dispatch_kgsb
675#endif /* if DEVELOPMENT || DEBUG */
676
b0d623f7
A
677/*
678 * Common dispatch point.
679 * Determine what mode has been interrupted and save state accordingly.
39236c6e
A
680 * Here with:
681 * rsp from user-space: interrupt state in PCB, or
682 * from kernel-space: interrupt state in kernel or interrupt stack
683 * GSBASE from user-space: pthread area, or
684 * from kernel-space: cpu_data
b0d623f7 685 */
5c9f4661 686
b0d623f7 687L_dispatch:
5c9f4661
A
688 pushq %rax
689 testb $3, 8+ISF64_CS(%rsp)
690 jz 1f
0a7de745 691L_dispatch_from_user_no_push_rax:
f427ee49 692 swapgs
5c9f4661 693 leaq EXT(idt64_hndl_table0)(%rip), %rax
f427ee49
A
694 mov 16(%rax), %rax /* Offset of per-CPU shadow */
695
696#if DEVELOPMENT || DEBUG
697 /* Stash the cacheline for #UD, #PF, and #GP */
698 cmpl $(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
699 je do_cacheline_stash
700 cmpl $(T_PAGE_FAULT), 8+ISF64_TRAPNO(%rsp)
701 je do_cacheline_stash
702 cmpl $(T_GENERAL_PROTECTION), 8+ISF64_TRAPNO(%rsp)
703 je do_cacheline_stash
704#endif
705
d9a64523 706L_dispatch_kgsb:
0a7de745 707 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax
5c9f4661
A
708 mov %rax, %cr3
709#if DEBUG
710 mov %rax, %gs:CPU_ENTRY_CR3
711#endif
0a7de745 712L_dispatch_from_kernel_no_push_rax:
5c9f4661 7131:
5c9f4661 714 leaq EXT(idt64_hndl_table0)(%rip), %rax
d9a64523 715 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
5c9f4661
A
716 /* Indirect branch to non-doublemapped trampolines */
717 jmp *(%rax)
718/* User return: register restoration and address space switch sequence */
719Entry(ks_64bit_return)
cb323159 720
5c9f4661
A
721 mov R64_R14(%r15), %r14
722 mov R64_R13(%r15), %r13
723 mov R64_R12(%r15), %r12
724 mov R64_R11(%r15), %r11
725 mov R64_R10(%r15), %r10
726 mov R64_R9(%r15), %r9
727 mov R64_R8(%r15), %r8
728 mov R64_RSI(%r15), %rsi
729 mov R64_RDI(%r15), %rdi
730 mov R64_RBP(%r15), %rbp
731 mov R64_RDX(%r15), %rdx
732 mov R64_RCX(%r15), %rcx
733 mov R64_RBX(%r15), %rbx
734 mov R64_RAX(%r15), %rax
735 /* Switch to per-CPU exception stack */
736 mov %gs:CPU_ESTACK, %rsp
737
738 /* Synthesize interrupt stack frame from PCB savearea to exception stack */
739 push R64_SS(%r15)
740 push R64_RSP(%r15)
741 push R64_RFLAGS(%r15)
742 push R64_CS(%r15)
743 push R64_RIP(%r15)
744
5c9f4661 745 cmpq $(KERNEL64_CS), 8(%rsp)
0a7de745
A
746 jne 1f /* Returning to user (%r15 will be restored after the segment checks) */
747 mov R64_R15(%r15), %r15
748 jmp L_64b_kernel_return /* Returning to kernel */
749
7501:
751 push %rax /* [A] */
752 movl %gs:CPU_NEED_SEGCHK, %eax
753 push %rax /* [B] */
754
755 /* Returning to user */
756 cmpl $0, %gs:CPU_CURTASK_HAS_LDT /* If the current task has an LDT, check and restore segment regs */
757 jne L_64b_segops_island
758
759 /*
760 * Restore %r15, since we're now done accessing saved state
761 * and (%r15) won't be accessible after the %cr3 load anyway.
762 * Note that %r15 is restored below for the segment-restore
763 * case, just after we no longer need to access register state
764 * relative to %r15.
765 */
766 mov R64_R15(%r15), %r15
767
768 /*
769 * Note that this %cr3 sequence is duplicated here to save
770 * [at least] a load and comparison that would be required if
771 * this block were shared.
772 */
5c9f4661 773 /* Discover user cr3/ASID */
5c9f4661
A
774 mov %gs:CPU_UCR3, %rax
775#if DEBUG
776 mov %rax, %gs:CPU_EXIT_CR3
777#endif
778 mov %rax, %cr3
779 /* Continue execution on the shared/doublemapped trampoline */
5c9f4661 780 swapgs
0a7de745
A
781
782L_chk_sysret:
783 pop %rax /* Matched to [B], above (segchk required) */
784
785 /*
786 * At this point, the stack contains:
787 *
788 * +--------------+
789 * | Return SS | +40
790 * | Return RSP | +32
791 * | Return RFL | +24
792 * | Return CS | +16
793 * | Return RIP | +8
794 * | Saved RAX | <-- rsp
795 * +--------------+
796 */
797 cmpl $(SYSCALL_CS), 16(%rsp) /* test for exit via SYSRET */
5c9f4661 798 je L_sysret
0a7de745
A
799
800 cmpl $1, %eax
801 je L_verw_island_2
802
803 pop %rax /* Matched to [A], above */
804
805L_64b_kernel_return:
806.globl EXT(ret64_iret)
5c9f4661 807EXT(ret64_iret):
0a7de745
A
808 iretq /* return from interrupt */
809
810
5c9f4661 811L_sysret:
0a7de745
A
812 cmpl $1, %eax
813 je L_verw_island_3
814
815 pop %rax /* Matched to [A], above */
5c9f4661
A
816 /*
817 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
818 * rcx user rip
819 * r11 user rflags
820 * rsp user stack pointer
821 */
822 pop %rcx
823 add $8, %rsp
824 pop %r11
825 pop %rsp
0a7de745
A
826 sysretq /* return from system call */
827
828
829L_verw_island_2:
830
831 pop %rax /* Matched to [A], above */
832 verw 40(%rsp) /* verw operates on the %ss value already on the stack */
833 jmp EXT(ret64_iret)
834
835
836L_verw_island_3:
837
838 pop %rax /* Matched to [A], above */
839
840 /*
841 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
842 * rcx user rip
843 * r11 user rflags
844 * rsp user stack pointer
845 */
846 pop %rcx
847 add $8, %rsp
848 pop %r11
849 verw 8(%rsp) /* verw operates on the %ss value already on the stack */
850 pop %rsp
851 sysretq /* return from system call */
852
853
854L_64b_segops_island:
855
856 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
857 /* Exempt "known good" statically configured selectors, e.g. USER64_CS and 0 */
858 cmpl $(USER64_CS), R64_CS(%r15)
859 jz 11f
860 larw R64_CS(%r15), %ax
861 jnz L_64_reset_cs
862 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
863 testw $0x800, %ax
864 jz L_64_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */
865 jmp 11f
866L_64_reset_cs:
867 movl $(USER64_CS), R64_CS(%r15)
86811:
869 cmpl $0, R64_DS(%r15)
870 jz 22f
871 larw R64_DS(%r15), %ax
872 jz 22f
873 movl $0, R64_DS(%r15)
87422:
875 cmpl $0, R64_ES(%r15)
876 jz 33f
877 larw R64_ES(%r15), %ax
878 jz 33f
879 movl $0, R64_ES(%r15)
88033:
881 cmpl $0, R64_FS(%r15)
882 jz 44f
883 larw R64_FS(%r15), %ax
884 jz 44f
885 movl $0, R64_FS(%r15)
88644:
887 cmpl $0, R64_GS(%r15)
888 jz 55f
889 larw R64_GS(%r15), %ax
890 jz 55f
891 movl $0, R64_GS(%r15)
89255:
893 /*
894 * Pack the segment registers in %rax since (%r15) will not
895 * be accessible after the %cr3 switch.
896 * Only restore %gs if cthread_self is zero, (indicate
897 * this to the code below with a value of 0xffff)
898 */
899 mov %gs:CPU_ACTIVE_THREAD, %rax /* Get the active thread */
900 cmpq $0, TH_CTH_SELF(%rax)
901 je L_restore_gs
902 movw $0xFFFF, %ax
903 jmp 1f
904L_restore_gs:
905 movw R64_GS(%r15), %ax
9061:
907 shlq $16, %rax
908 movw R64_FS(%r15), %ax
909 shlq $16, %rax
910 movw R64_ES(%r15), %ax
911 shlq $16, %rax
912 movw R64_DS(%r15), %ax
913
914 /*
915 * Restore %r15, since we're done accessing saved state
916 * and (%r15) won't be accessible after the %cr3 switch.
917 */
918 mov R64_R15(%r15), %r15
919
920 /* Discover user cr3/ASID */
921 push %rax
922 mov %gs:CPU_UCR3, %rax
923#if DEBUG
924 mov %rax, %gs:CPU_EXIT_CR3
925#endif
926 mov %rax, %cr3
927 /* Continue execution on the shared/doublemapped trampoline */
928 pop %rax
929 swapgs
930
931 /*
932 * Returning to user; restore segment registers that might be used
933 * by compatibility-mode code in a 64-bit user process.
934 *
935 * Note that if we take a fault here, it's OK that we haven't yet
936 * popped %rax from the stack, because %rsp will be reset to
937 * the value pushed onto the exception stack (above).
938 */
939 movw %ax, %ds
940 shrq $16, %rax
941
942 movw %ax, %es
943 shrq $16, %rax
944
945 movw %ax, %fs
946 shrq $16, %rax
947
948 /*
949 * 0xFFFF is the sentinel set above that indicates we should
950 * not restore %gs (because GS.base was already set elsewhere
951 * (e.g.: in act_machine_set_pcb or machine_thread_set_tsd_base))
952 */
953 cmpw $0xFFFF, %ax
954 je L_chk_sysret
955 movw %ax, %gs /* Restore %gs to user-set value */
956 jmp L_chk_sysret
957
d9a64523
A
958
959L_u64bit_entry_check:
960 /*
961 * Check we're not a confused 64-bit user.
962 */
963 pushq %rax
964 swapgs
965 leaq EXT(idt64_hndl_table0)(%rip), %rax
966 mov 16(%rax), %rax
967
968 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP(%rax)
969 jne L_64bit_entry_reject
970 jmp L_dispatch_kgsb
971
972L_64bit_entry_reject:
973 /*
974 * Here for a 64-bit user attempting an invalid kernel entry.
975 */
976 movq $(HNDL_ALLTRAPS), 8+ISF64_TRAPFN(%rsp)
977 movq $(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
978 jmp L_dispatch_kgsb
979
cb323159
A
980Entry(ks_32bit_return)
981
982 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
983 /* Exempt "known good" statically configured selectors, e.g. USER_CS, USER_DS and 0 */
984 cmpl $(USER_CS), R32_CS(%r15)
985 jz 11f
986 larw R32_CS(%r15), %ax
987 jnz L_32_reset_cs
988 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
989 testw $0x800, %ax
990 jz L_32_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */
991 jmp 11f
992L_32_reset_cs:
993 movl $(USER_CS), R32_CS(%r15)
99411:
995 cmpl $(USER_DS), R32_DS(%r15)
996 jz 22f
997 cmpl $0, R32_DS(%r15)
998 jz 22f
999 larw R32_DS(%r15), %ax
1000 jz 22f
1001 movl $(USER_DS), R32_DS(%r15)
100222:
1003 cmpl $(USER_DS), R32_ES(%r15)
1004 jz 33f
1005 cmpl $0, R32_ES(%r15)
1006 jz 33f
1007 larw R32_ES(%r15), %ax
1008 jz 33f
1009 movl $(USER_DS), R32_ES(%r15)
101033:
1011 cmpl $(USER_DS), R32_FS(%r15)
1012 jz 44f
1013 cmpl $0, R32_FS(%r15)
1014 jz 44f
1015 larw R32_FS(%r15), %ax
1016 jz 44f
1017 movl $(USER_DS), R32_FS(%r15)
101844:
1019 cmpl $(USER_CTHREAD), R32_GS(%r15)
1020 jz 55f
1021 cmpl $0, R32_GS(%r15)
1022 jz 55f
1023 larw R32_GS(%r15), %ax
1024 jz 55f
1025 movl $(USER_CTHREAD), R32_GS(%r15)
102655:
1027
1028 /*
1029 * Restore general 32-bit registers
1030 */
1031 movl R32_EAX(%r15), %eax
1032 movl R32_EBX(%r15), %ebx
1033 movl R32_ECX(%r15), %ecx
1034 movl R32_EDX(%r15), %edx
1035 movl R32_EBP(%r15), %ebp
1036 movl R32_ESI(%r15), %esi
1037 movl R32_EDI(%r15), %edi
1038 movl R32_DS(%r15), %r8d
1039 movl R32_ES(%r15), %r9d
1040 movl R32_FS(%r15), %r10d
1041 movl R32_GS(%r15), %r11d
1042
1043 /* Switch to the per-cpu (doublemapped) exception stack */
1044 mov %gs:CPU_ESTACK, %rsp
1045
1046 /* Now transfer the ISF to the exception stack in preparation for iret, below */
1047 movl R32_SS(%r15), %r12d
1048 push %r12
1049 movl R32_UESP(%r15), %r12d
1050 push %r12
1051 movl R32_EFLAGS(%r15), %r12d
1052 push %r12
1053 movl R32_CS(%r15), %r12d
1054 push %r12
1055 movl R32_EIP(%r15), %r12d
1056 push %r12
1057
1058 movl %gs:CPU_NEED_SEGCHK, %r14d /* %r14 will be zeroed just before we return */
1059
1060 /*
1061 * Finally, switch to the user pagetables. After this, all %gs-relative
1062 * accesses MUST be to cpu shadow data ONLY. Note that after we restore %gs
1063 * (after the swapgs), no %gs-relative accesses should be performed.
1064 */
1065 /* Discover user cr3/ASID */
1066 mov %gs:CPU_UCR3, %r13
1067#if DEBUG
1068 mov %r13, %gs:CPU_EXIT_CR3
1069#endif
1070 mov %r13, %cr3
1071
1072 swapgs
1073
1074 /*
1075 * Restore segment registers. A #GP taken here will push state onto IST1,
1076 * not the exception stack. Note that the placement of the labels here
1077 * corresponds to the fault address-detection logic (so do not change them
1078 * without also changing that code).
1079 */
1080L_32bit_seg_restore_begin:
1081 mov %r8, %ds
1082 mov %r9, %es
1083 mov %r10, %fs
1084 mov %r11, %gs
1085L_32bit_seg_restore_done:
1086
1087 /* Zero 64-bit-exclusive GPRs to prevent data leaks */
1088 xor %r8, %r8
1089 xor %r9, %r9
1090 xor %r10, %r10
1091 xor %r11, %r11
1092 xor %r12, %r12
1093 xor %r13, %r13
1094 xor %r15, %r15
1095
1096 /*
1097 * At this point, the stack contains:
1098 *
1099 * +--------------+
1100 * | Return SS | +32
1101 * | Return RSP | +24
1102 * | Return RFL | +16
1103 * | Return CS | +8
1104 * | Return RIP | <-- rsp
1105 * +--------------+
1106 */
1107
1108 cmpl $(SYSENTER_CS), 8(%rsp)
1109 /* test for sysexit */
1110 je L_rtu_via_sysexit
1111
1112 cmpl $1, %r14d
1113 je L_verw_island
1114
1115L_after_verw:
1116 xor %r14, %r14
1117
1118.globl EXT(ret32_iret)
1119EXT(ret32_iret):
1120 iretq /* return from interrupt */
1121
1122L_verw_island:
1123 verw 32(%rsp)
1124 jmp L_after_verw
1125
1126L_verw_island_1:
1127 verw 16(%rsp)
1128 jmp L_after_verw_1
1129
1130L_rtu_via_sysexit:
1131 pop %rdx /* user return eip */
1132 pop %rcx /* pop and toss cs */
1133 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
1134
1135 /*
1136 * %ss is now at 16(%rsp)
1137 */
1138 cmpl $1, %r14d
1139 je L_verw_island_1
1140L_after_verw_1:
1141 xor %r14, %r14
1142
1143 popf /* flags - carry denotes failure */
1144 pop %rcx /* user return esp */
1145
1146
1147 sti /* interrupts enabled after sysexit */
1148 sysexitl /* 32-bit sysexit */
1149
5c9f4661
A
1150/* End of double-mapped TEXT */
1151.text
1152
1153Entry(ks_dispatch)
1154 popq %rax
6d2010ae 1155 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
5c9f4661 1156 je EXT(ks_dispatch_kernel)
b0d623f7 1157
5c9f4661
A
1158 mov %rax, %gs:CPU_UBER_TMP
1159 mov %gs:CPU_UBER_ISF, %rax
1160 add $(ISF64_SIZE), %rax
1161
1162 xchg %rsp, %rax
1163/* Memory to memory moves (aint x86 wonderful):
1164 * Transfer the exception frame from the per-CPU exception stack to the
1165 * 'PCB' stack programmed at cswitch.
1166 */
1167 push ISF64_SS(%rax)
1168 push ISF64_RSP(%rax)
1169 push ISF64_RFLAGS(%rax)
1170 push ISF64_CS(%rax)
1171 push ISF64_RIP(%rax)
1172 push ISF64_ERR(%rax)
1173 push ISF64_TRAPFN(%rax)
1174 push ISF64_TRAPNO(%rax)
1175 mov %gs:CPU_UBER_TMP, %rax
1176 jmp EXT(ks_dispatch_user)
b0d623f7 1177
d9a64523
A
1178Entry(ks_dispatch_user_with_pop_rax)
1179 pop %rax
1180 jmp EXT(ks_dispatch_user)
1181
5c9f4661 1182Entry(ks_dispatch_user)
060df5ea 1183 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
39236c6e
A
1184 je L_dispatch_U32 /* 32-bit user task */
1185
1186L_dispatch_U64:
1187 subq $(ISS64_OFFSET), %rsp
1188 mov %r15, R64_R15(%rsp)
1189 mov %rsp, %r15
1190 mov %gs:CPU_KERNEL_STACK, %rsp
1191 jmp L_dispatch_64bit
1192
d9a64523
A
1193Entry(ks_dispatch_kernel_with_pop_rax)
1194 pop %rax
1195 jmp EXT(ks_dispatch_kernel)
1196
5c9f4661 1197Entry(ks_dispatch_kernel)
39236c6e
A
1198 subq $(ISS64_OFFSET), %rsp
1199 mov %r15, R64_R15(%rsp)
1200 mov %rsp, %r15
b0d623f7
A
1201
1202/*
1203 * Here for 64-bit user task or kernel
1204 */
39236c6e
A
1205L_dispatch_64bit:
1206 movl $(SS_64), SS_FLAVOR(%r15)
b0d623f7
A
1207
1208 /*
0a7de745
A
1209 * Save segment regs if a 64-bit task has
1210 * installed customized segments in the LDT
b0d623f7 1211 */
0a7de745
A
1212 cmpl $0, %gs:CPU_CURTASK_HAS_LDT
1213 je L_skip_save_extra_segregs
1214
1215 mov %ds, R64_DS(%r15)
1216 mov %es, R64_ES(%r15)
1217
1218L_skip_save_extra_segregs:
d9a64523
A
1219 mov %fs, R64_FS(%r15)
1220 mov %gs, R64_GS(%r15)
b0d623f7 1221
0a7de745 1222
b0d623f7 1223 /* Save general-purpose registers */
39236c6e
A
1224 mov %rax, R64_RAX(%r15)
1225 mov %rbx, R64_RBX(%r15)
1226 mov %rcx, R64_RCX(%r15)
1227 mov %rdx, R64_RDX(%r15)
1228 mov %rbp, R64_RBP(%r15)
1229 mov %rdi, R64_RDI(%r15)
1230 mov %rsi, R64_RSI(%r15)
1231 mov %r8, R64_R8(%r15)
1232 mov %r9, R64_R9(%r15)
1233 mov %r10, R64_R10(%r15)
1234 mov %r11, R64_R11(%r15)
1235 mov %r12, R64_R12(%r15)
1236 mov %r13, R64_R13(%r15)
1237 mov %r14, R64_R14(%r15)
b0d623f7 1238
a39ff7e2
A
1239 /* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
1240 xor %ecx, %ecx
1241 xor %edi, %edi
1242 xor %r8, %r8
1243 xor %r9, %r9
1244 xor %r10, %r10
1245 xor %r11, %r11
1246 xor %r12, %r12
1247 xor %r13, %r13
1248 xor %r14, %r14
1249
b0d623f7
A
1250 /* cr2 is significant only for page-faults */
1251 mov %cr2, %rax
39236c6e 1252 mov %rax, R64_CR2(%r15)
b0d623f7 1253
0a7de745 1254L_dispatch_U64_after_fault:
39236c6e
A
1255 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
1256 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
1257 mov R64_CS(%r15), %esi /* %esi := cs for later */
b0d623f7 1258
39236c6e 1259 jmp L_common_dispatch
b0d623f7 1260
39236c6e
A
1261L_dispatch_U32: /* 32-bit user task */
1262 subq $(ISS64_OFFSET), %rsp
1263 mov %rsp, %r15
1264 mov %gs:CPU_KERNEL_STACK, %rsp
1265 movl $(SS_32), SS_FLAVOR(%r15)
b0d623f7
A
1266
1267 /*
1268 * Save segment regs
1269 */
d9a64523
A
1270 mov %ds, R32_DS(%r15)
1271 mov %es, R32_ES(%r15)
1272 mov %fs, R32_FS(%r15)
1273 mov %gs, R32_GS(%r15)
b0d623f7
A
1274
1275 /*
1276 * Save general 32-bit registers
1277 */
39236c6e
A
1278 mov %eax, R32_EAX(%r15)
1279 mov %ebx, R32_EBX(%r15)
1280 mov %ecx, R32_ECX(%r15)
1281 mov %edx, R32_EDX(%r15)
1282 mov %ebp, R32_EBP(%r15)
1283 mov %esi, R32_ESI(%r15)
1284 mov %edi, R32_EDI(%r15)
b0d623f7
A
1285
1286 /* Unconditionally save cr2; only meaningful on page faults */
1287 mov %cr2, %rax
39236c6e 1288 mov %eax, R32_CR2(%r15)
a39ff7e2
A
1289 /* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
1290 xor %ecx, %ecx
1291 xor %edi, %edi
1292 xor %r8, %r8
1293 xor %r9, %r9
1294 xor %r10, %r10
1295 xor %r11, %r11
1296 xor %r12, %r12
1297 xor %r13, %r13
1298 xor %r14, %r14
b0d623f7
A
1299
1300 /*
1301 * Copy registers already saved in the machine state
1302 * (in the interrupt stack frame) into the compat save area.
1303 */
39236c6e
A
1304 mov R64_RIP(%r15), %eax
1305 mov %eax, R32_EIP(%r15)
1306 mov R64_RFLAGS(%r15), %eax
1307 mov %eax, R32_EFLAGS(%r15)
1308 mov R64_RSP(%r15), %eax
1309 mov %eax, R32_UESP(%r15)
1310 mov R64_SS(%r15), %eax
1311 mov %eax, R32_SS(%r15)
1312L_dispatch_U32_after_fault:
1313 mov R64_CS(%r15), %esi /* %esi := %cs for later */
1314 mov %esi, R32_CS(%r15)
1315 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
1316 mov %ebx, R32_TRAPNO(%r15)
1317 mov R64_ERR(%r15), %eax
1318 mov %eax, R32_ERR(%r15)
1319 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
b0d623f7
A
1320
1321L_common_dispatch:
fe8ab488
A
1322 cld /* Ensure the direction flag is clear in the kernel */
1323 cmpl $0, EXT(pmap_smap_enabled)(%rip)
1324 je 1f
1325 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
13261:
b0d623f7 1327 /*
0a7de745 1328 * We mark the kernel's cr3 as "active" for TLB coherency evaluation
39037602
A
1329 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
1330 * we switch to the kernel's address space on entry. Also,
1331 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
b0d623f7
A
1332 * so that illicit accesses to userspace can be trapped.
1333 */
1334 mov %gs:CPU_KERNEL_CR3, %rcx
1335 mov %rcx, %gs:CPU_ACTIVE_CR3
0a7de745
A
1336 test $3, %esi /* CS: user/kernel? */
1337 jz 2f /* skip CR3 reload if from kernel */
1338 xor %ebp, %ebp
39037602
A
1339 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
1340 jnz 11f
b0d623f7 1341 cmpl $0, EXT(no_shared_cr3)(%rip)
fe8ab488 1342 je 2f
39037602
A
134311:
1344 xor %eax, %eax
1345 movw %gs:CPU_KERNEL_PCID, %ax
1346 or %rax, %rcx
b0d623f7 1347 mov %rcx, %cr3 /* load kernel cr3 */
0a7de745 1348 jmp 4f
fe8ab488 13492:
0a7de745
A
1350 /* Deferred processing of pending kernel address space TLB invalidations */
1351 mov %gs:CPU_ACTIVE_CR3+4, %rcx
1352 shr $32, %rcx
1353 testl %ecx, %ecx
1354 jz 4f
1355 movl $0, %gs:CPU_TLB_INVALID
1356 cmpb $0, EXT(invpcid_enabled)(%rip)
1357 jz L_cr4_island
1358 movl $2, %ecx
1359 invpcid %gs:CPU_IP_DESC, %rcx
fe8ab488 13604:
0a7de745 1361L_set_act:
b0d623f7 1362 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
5ba3f43e 1363 testq %rcx, %rcx
0a7de745 1364 je L_intcnt
fe8ab488 1365 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
6d2010ae 1366 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
0a7de745
A
1367 jnz L_dr7_island
1368L_intcnt:
6d2010ae 1369 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
b0d623f7 1370 /* Dispatch the designated handler */
a39ff7e2
A
1371 cmp EXT(dblmap_base)(%rip), %rsp
1372 jb 66f
1373 cmp EXT(dblmap_max)(%rip), %rsp
1374 jge 66f
1375 subq EXT(dblmap_dist)(%rip), %rsp
1376 subq EXT(dblmap_dist)(%rip), %r15
137766:
5c9f4661
A
1378 leaq EXT(idt64_hndl_table1)(%rip), %rax
1379 jmp *(%rax, %rdx, 8)
b0d623f7 1380
0a7de745
A
1381L_cr4_island:
1382 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
1383 and $(~CR4_PGE), %rcx
1384 mov %rcx, %cr4
1385 or $(CR4_PGE), %rcx
1386 mov %rcx, %cr4
1387 jmp L_set_act
1388L_dr7_island:
1389 xor %ecx, %ecx /* If so, reset DR7 (the control) */
1390 mov %rcx, %dr7
1391 jmp L_intcnt
b0d623f7
A
1392/*
1393 * Control is passed here to return to user.
1394 */
1395Entry(return_to_user)
1396 TIME_TRAP_UEXIT
1397
1398Entry(ret_to_user)
b0d623f7 1399 mov %gs:CPU_ACTIVE_THREAD, %rdx
0a7de745
A
1400 cmpq $0, TH_PCB_IDS(%rdx) /* Is there a debug register context? */
1401 jnz L_dr_restore_island
1402L_post_dr_restore:
b0d623f7 1403 /*
0a7de745
A
1404 * We now mark the task's address space as active for TLB coherency.
1405 * Handle special cases such as pagezero-less tasks here.
b0d623f7
A
1406 */
1407 mov %gs:CPU_TASK_CR3, %rcx
1408 mov %rcx, %gs:CPU_ACTIVE_CR3
39037602
A
1409 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
1410 jnz L_cr3_switch_island
6d2010ae
A
1411 movl EXT(no_shared_cr3)(%rip), %eax
1412 test %eax, %eax /* -no_shared_cr3 */
39037602
A
1413 jnz L_cr3_switch_island
1414
1415L_cr3_switch_return:
b0d623f7
A
1416 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
1417 cmp $0, %rax
1418 je 4f
1419 mov %rax, %dr7 /* Set DR7 */
1420 movq $0, %gs:CPU_DR7
14214:
39236c6e 1422 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
cb323159
A
1423 jne L_32bit_return
1424
1425 /*
1426 * Restore general 64-bit registers.
1427 * Here on fault stack and PCB address in R15.
1428 */
1429 leaq EXT(idt64_hndl_table0)(%rip), %rax
1430 jmp *8(%rax)
1431
b0d623f7
A
1432
1433L_32bit_return:
1434#if DEBUG_IDT64
39236c6e 1435 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
b0d623f7
A
1436 je 1f
1437 cli
1438 POSTCODE2(0x6432)
fe8ab488 1439 CCALL1(panic_idt64, %r15)
b0d623f7
A
14401:
1441#endif /* DEBUG_IDT64 */
1442
cb323159
A
1443 leaq EXT(idt64_hndl_table0)(%rip), %rax
1444 jmp *0x18(%rax)
0a7de745 1445
b0d623f7 1446
0a7de745
A
1447L_dr_restore_island:
1448 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
1449 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
1450 jne 1f
1451 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
1452 movq %rcx, %dr0
1453 movl DS_DR1(%rax), %ecx
1454 movq %rcx, %dr1
1455 movl DS_DR2(%rax), %ecx
1456 movq %rcx, %dr2
1457 movl DS_DR3(%rax), %ecx
1458 movq %rcx, %dr3
1459 movl DS_DR7(%rax), %ecx
1460 movq %rcx, %gs:CPU_DR7
1461 jmp 2f
14621:
1463 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
1464 mov %rcx, %dr0
1465 mov DS64_DR1(%rax), %rcx
1466 mov %rcx, %dr1
1467 mov DS64_DR2(%rax), %rcx
1468 mov %rcx, %dr2
1469 mov DS64_DR3(%rax), %rcx
1470 mov %rcx, %dr3
1471 mov DS64_DR7(%rax), %rcx
1472 mov %rcx, %gs:CPU_DR7
14732:
1474 jmp L_post_dr_restore
39037602
A
1475L_cr3_switch_island:
1476 xor %eax, %eax
1477 movw %gs:CPU_ACTIVE_PCID, %ax
1478 or %rax, %rcx
1479 mov %rcx, %cr3
1480 jmp L_cr3_switch_return
1481
b0d623f7
A
1482ret_to_kernel:
1483#if DEBUG_IDT64
39236c6e 1484 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
b0d623f7
A
1485 je 1f
1486 cli
1487 POSTCODE2(0x6464)
39236c6e 1488 CCALL1(panic_idt64, %r15)
b0d623f7
A
1489 hlt
14901:
39236c6e 1491 cmpl $(KERNEL64_CS), R64_CS(%r15)
b0d623f7 1492 je 2f
39236c6e 1493 CCALL1(panic_idt64, %r15)
b0d623f7
A
1494 hlt
14952:
1496#endif
39236c6e
A
1497 /*
1498 * Restore general 64-bit registers.
1499 * Here on fault stack and PCB address in R15.
1500 */
5c9f4661
A
1501 leaq EXT(idt64_hndl_table0)(%rip), %rax
1502 jmp *8(%rax)
b0d623f7 1503
39236c6e
A
1504/* All 'exceptions' enter hndl_alltraps, with:
1505 * r15 x86_saved_state_t address
1506 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1507 * esi cs at trap
b0d623f7
A
1508 *
1509 * The rest of the state is set up as:
39236c6e 1510 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1511 * interrupts disabled
1512 * direction flag cleared
1513 */
1514Entry(hndl_alltraps)
1515 mov %esi, %eax
1516 testb $3, %al
1517 jz trap_from_kernel
1518
1519 TIME_TRAP_UENTRY
1520
6d2010ae
A
1521 /* Check for active vtimers in the current task */
1522 mov %gs:CPU_ACTIVE_THREAD, %rcx
fe8ab488 1523 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
6d2010ae
A
1524 mov TH_TASK(%rcx), %rbx
1525 TASK_VTIMER_CHECK(%rbx, %rcx)
1526
39236c6e 1527 CCALL1(user_trap, %r15) /* call user trap routine */
6d2010ae 1528 /* user_trap() unmasks interrupts */
b0d623f7 1529 cli /* hold off intrs - critical section */
b0d623f7
A
1530 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1531
b0d623f7
A
1532
1533Entry(return_from_trap)
39236c6e 1534 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
fe8ab488 1535 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
f427ee49 1536
39236c6e
A
1537 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1538 jz 1f
f427ee49
A
1539 xorq %rbp, %rbp /* clear framepointer */
1540 mov %r15, %rdi /* Set RDI to current thread */
39236c6e
A
1541 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
15421:
f427ee49
A
1543
1544 cmpl $0, TH_TMP_ALLOC_CNT(%r15) /* Check if current thread has KHEAP_TEMP leaks */
1545 jz 1f
1546 xorq %rbp, %rbp /* clear framepointer */
1547 mov %r15, %rdi /* Set RDI to current thread */
1548 CCALL(kheap_temp_leak_panic)
15491:
1550
1551 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
b0d623f7
A
1552 movl %gs:CPU_PENDING_AST,%eax
1553 testl %eax,%eax
39236c6e 1554 je EXT(return_to_user) /* branch if no AST */
b0d623f7
A
1555
1556L_return_from_trap_with_ast:
b0d623f7
A
1557 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1558 je 2f /* no, go handle the AST */
39236c6e 1559 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
b0d623f7
A
1560 je 1f
1561 /* no... 32-bit user mode */
39236c6e 1562 movl R32_EIP(%r15), %edi
6d2010ae 1563 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1564 CCALL(commpage_is_in_pfz32)
1565 testl %eax, %eax
1566 je 2f /* not in the PFZ... go service AST */
39236c6e 1567 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7
A
1568 jmp EXT(return_to_user)
15691:
39236c6e 1570 movq R64_RIP(%r15), %rdi
6d2010ae 1571 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1572 CCALL(commpage_is_in_pfz64)
1573 testl %eax, %eax
1574 je 2f /* not in the PFZ... go service AST */
39236c6e 1575 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7 1576 jmp EXT(return_to_user)
5c9f4661 15772:
b0d623f7 1578
6d2010ae 1579 xorq %rbp, %rbp /* clear framepointer */
5ba3f43e 1580 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
b0d623f7 1581
3e170ce0 1582 cli
39236c6e 1583 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1584 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1585 jmp EXT(return_from_trap) /* and check again (rare) */
1586
1587/*
1588 * Trap from kernel mode. No need to switch stacks.
1589 * Interrupts must be off here - we will set them to state at time of trap
1590 * as soon as it's safe for us to do so and not recurse doing preemption
39236c6e 1591 *
b0d623f7 1592 */
b0d623f7 1593trap_from_kernel:
f427ee49
A
1594
1595UNWIND_PROLOGUE
1596
39236c6e 1597 movq %r15, %rdi /* saved state addr */
f427ee49
A
1598
1599UNWIND_DIRECTIVES
1600
39236c6e 1601 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
b0d623f7
A
1602 pushq %rbp /* Extend framepointer chain */
1603 movq %rsp, %rbp
6d2010ae 1604 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
b0d623f7
A
1605 popq %rbp
1606 addq $8, %rsp
39236c6e 1607 mov %rsp, %r15 /* DTrace slides stack/saved-state */
b0d623f7
A
1608 cli
1609
1610 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1611 testl $(AST_URGENT),%eax /* any urgent preemption? */
1612 je ret_to_kernel /* no, nothing to do */
39236c6e 1613 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
b0d623f7 1614 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
39236c6e 1615 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
b0d623f7
A
1616 je ret_to_kernel
1617 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1618 jne ret_to_kernel
1619 movq %gs:CPU_KERNEL_STACK,%rax
1620 movq %rsp,%rcx
1621 xorq %rax,%rcx
1622 andq EXT(kernel_stack_mask)(%rip),%rcx
1623 testq %rcx,%rcx /* are we on the kernel stack? */
1624 jne ret_to_kernel /* no, skip it */
1625
5ba3f43e 1626 CCALL(ast_taken_kernel) /* take the AST */
39236c6e
A
1627
1628 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1629 jmp ret_to_kernel
1630
f427ee49
A
1631UNWIND_EPILOGUE
1632
b0d623f7
A
1633/*
1634 * All interrupts on all tasks enter here with:
39236c6e
A
1635 * r15 x86_saved_state_t
1636 * rsp kernel or interrupt stack
b0d623f7
A
1637 * esi cs at trap
1638 *
39236c6e 1639 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1640 * interrupts disabled
1641 * direction flag cleared
1642 */
1643Entry(hndl_allintrs)
f427ee49
A
1644
1645UNWIND_PROLOGUE
1646
b0d623f7
A
1647 /*
1648 * test whether already on interrupt stack
1649 */
1650 movq %gs:CPU_INT_STACK_TOP,%rcx
1651 cmpq %rsp,%rcx
1652 jb 1f
1653 leaq -INTSTACK_SIZE(%rcx),%rdx
1654 cmpq %rsp,%rdx
1655 jb int_from_intstack
060df5ea 16561:
b0d623f7
A
1657 xchgq %rcx,%rsp /* switch to interrupt stack */
1658
1659 mov %cr0,%rax /* get cr0 */
1660 orl $(CR0_TS),%eax /* or in TS bit */
1661 mov %rax,%cr0 /* set cr0 */
1662
b0d623f7 1663 pushq %rcx /* save pointer to old stack */
39236c6e
A
1664 pushq %gs:CPU_INT_STATE /* save previous intr state */
1665 movq %r15,%gs:CPU_INT_STATE /* set intr state */
f427ee49
A
1666
1667UNWIND_DIRECTIVES
b0d623f7
A
1668
1669 TIME_INT_ENTRY /* do timing */
1670
6d2010ae
A
1671 /* Check for active vtimers in the current task */
1672 mov %gs:CPU_ACTIVE_THREAD, %rcx
1673 mov TH_TASK(%rcx), %rbx
1674 TASK_VTIMER_CHECK(%rbx, %rcx)
1675
b0d623f7
A
1676 incl %gs:CPU_PREEMPTION_LEVEL
1677 incl %gs:CPU_INTERRUPT_LEVEL
1678
39236c6e 1679 CCALL1(interrupt, %r15) /* call generic interrupt routine */
f427ee49
A
1680
1681UNWIND_EPILOGUE
b0d623f7 1682
5c9f4661 1683.globl EXT(return_to_iret)
b0d623f7
A
1684LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1685
1686 decl %gs:CPU_INTERRUPT_LEVEL
1687 decl %gs:CPU_PREEMPTION_LEVEL
1688
1689 TIME_INT_EXIT /* do timing */
1690
39236c6e
A
1691 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1692 popq %rsp /* switch back to old stack */
1693
b0d623f7 1694 movq %gs:CPU_ACTIVE_THREAD,%rax
6d2010ae 1695 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
b0d623f7
A
1696 cmpq $0,%rax /* Is there a context */
1697 je 1f /* Branch if not */
1698 movl FP_VALID(%rax),%eax /* Load fp_valid */
1699 cmpl $0,%eax /* Check if valid */
1700 jne 1f /* Branch if valid */
1701 clts /* Clear TS */
1702 jmp 2f
17031:
1704 mov %cr0,%rax /* get cr0 */
1705 orl $(CR0_TS),%eax /* or in TS bit */
1706 mov %rax,%cr0 /* set cr0 */
17072:
b0d623f7 1708 /* Load interrupted code segment into %eax */
f427ee49
A
1709 movl R64_CS(%r15),%eax /* assume 64-bit state */
1710 cmpl $(SS_32),SS_FLAVOR(%r15)/* 32-bit? */
b0d623f7 1711#if DEBUG_IDT64
f427ee49
A
1712 jne 5f
1713 movl R32_CS(%r15),%eax /* 32-bit user mode */
b0d623f7 1714 jmp 3f
f427ee49
A
17155:
1716 cmpl $(SS_64),SS_FLAVOR(%r15)
b0d623f7
A
1717 je 3f
1718 POSTCODE2(0x6431)
39236c6e 1719 CCALL1(panic_idt64, %r15)
b0d623f7
A
1720 hlt
1721#else
f427ee49 1722 je 4f
b0d623f7
A
1723#endif
17243:
1725 testb $3,%al /* user mode, */
1726 jnz ast_from_interrupt_user /* go handle potential ASTs */
1727 /*
1728 * we only want to handle preemption requests if
1729 * the interrupt fell in the kernel context
1730 * and preemption isn't disabled
1731 */
1732 movl %gs:CPU_PENDING_AST,%eax
1733 testl $(AST_URGENT),%eax /* any urgent requests? */
1734 je ret_to_kernel /* no, nothing to do */
1735
1736 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1737 jne ret_to_kernel /* yes, skip it */
1738
b0d623f7
A
1739 /*
1740 * Take an AST from kernel space. We don't need (and don't want)
1741 * to do as much as the case where the interrupt came from user
1742 * space.
1743 */
5ba3f43e 1744 CCALL(ast_taken_kernel)
b0d623f7 1745
39236c6e 1746 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7 1747 jmp ret_to_kernel
f427ee49
A
17484:
1749 movl R32_CS(%r15),%eax /* 32-bit user mode */
1750 jmp 3b
b0d623f7
A
1751
1752
1753/*
1754 * nested int - simple path, can't preempt etc on way out
1755 */
1756int_from_intstack:
1757 incl %gs:CPU_PREEMPTION_LEVEL
1758 incl %gs:CPU_INTERRUPT_LEVEL
060df5ea 1759 incl %gs:CPU_NESTED_ISTACK
39236c6e
A
1760
1761 push %gs:CPU_INT_STATE
1762 mov %r15, %gs:CPU_INT_STATE
1763
1764 CCALL1(interrupt, %r15)
1765
1766 pop %gs:CPU_INT_STATE
b0d623f7
A
1767
1768 decl %gs:CPU_INTERRUPT_LEVEL
1769 decl %gs:CPU_PREEMPTION_LEVEL
060df5ea 1770 decl %gs:CPU_NESTED_ISTACK
39236c6e 1771
b0d623f7
A
1772 jmp ret_to_kernel
1773
1774/*
1775 * Take an AST from an interrupted user
1776 */
1777ast_from_interrupt_user:
1778 movl %gs:CPU_PENDING_AST,%eax
1779 testl %eax,%eax /* pending ASTs? */
1780 je EXT(ret_to_user) /* no, nothing to do */
1781
1782 TIME_TRAP_UENTRY
1783
1784 movl $1, %ecx /* check if we're in the PFZ */
1785 jmp L_return_from_trap_with_ast /* return */
1786
1787
1788/* Syscall dispatch routines! */
1789
1790/*
1791 *
1792 * 32bit Tasks
1793 * System call entries via INTR_GATE or sysenter:
1794 *
39236c6e
A
1795 * r15 x86_saved_state32_t
1796 * rsp kernel stack
1797 *
1798 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1799 * interrupts disabled
1800 * direction flag cleared
1801 */
1802
1803Entry(hndl_sysenter)
1804 /*
1805 * We can be here either for a mach syscall or a unix syscall,
1806 * as indicated by the sign of the code:
1807 */
39236c6e 1808 movl R32_EAX(%r15),%eax
b0d623f7
A
1809 testl %eax,%eax
1810 js EXT(hndl_mach_scall) /* < 0 => mach */
1811 /* > 0 => unix */
1812
1813Entry(hndl_unix_scall)
b0d623f7
A
1814
1815 TIME_TRAP_UENTRY
1816
b0d623f7 1817 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1818 movq TH_TASK(%rcx),%rbx /* point to current task */
1819 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1820
1821 /* Check for active vtimers in the current task */
1822 TASK_VTIMER_CHECK(%rbx,%rcx)
1823
1824 sti
1825
39236c6e 1826 CCALL1(unix_syscall, %r15)
b0d623f7
A
1827 /*
1828 * always returns through thread_exception_return
1829 */
1830
1831
1832Entry(hndl_mach_scall)
1833 TIME_TRAP_UENTRY
1834
b0d623f7 1835 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1836 movq TH_TASK(%rcx),%rbx /* point to current task */
1837 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1838
1839 /* Check for active vtimers in the current task */
1840 TASK_VTIMER_CHECK(%rbx,%rcx)
1841
1842 sti
1843
39236c6e 1844 CCALL1(mach_call_munger, %r15)
b0d623f7
A
1845 /*
1846 * always returns through thread_exception_return
1847 */
1848
1849
1850Entry(hndl_mdep_scall)
1851 TIME_TRAP_UENTRY
1852
b0d623f7
A
1853 /* Check for active vtimers in the current task */
1854 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae 1855 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1856 TASK_VTIMER_CHECK(%rbx,%rcx)
1857
1858 sti
1859
39236c6e 1860 CCALL1(machdep_syscall, %r15)
b0d623f7
A
1861 /*
1862 * always returns through thread_exception_return
1863 */
1864
b0d623f7
A
1865/*
1866 * 64bit Tasks
1867 * System call entries via syscall only:
1868 *
39236c6e
A
1869 * r15 x86_saved_state64_t
1870 * rsp kernel stack
1871 *
1872 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1873 * interrupts disabled
1874 * direction flag cleared
1875 */
1876
1877Entry(hndl_syscall)
1878 TIME_TRAP_UENTRY
1879
b0d623f7 1880 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
fe8ab488 1881 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
6d2010ae 1882 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1883
1884 /* Check for active vtimers in the current task */
1885 TASK_VTIMER_CHECK(%rbx,%rcx)
1886
1887 /*
1888 * We can be here either for a mach, unix machdep or diag syscall,
1889 * as indicated by the syscall class:
1890 */
39236c6e 1891 movl R64_RAX(%r15), %eax /* syscall number/class */
b0d623f7
A
1892 movl %eax, %edx
1893 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1894 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1895 je EXT(hndl_mach_scall64)
1896 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1897 je EXT(hndl_unix_scall64)
1898 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1899 je EXT(hndl_mdep_scall64)
1900 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1901 je EXT(hndl_diag_scall64)
1902
1903 /* Syscall class unknown */
316670eb 1904 sti
b0d623f7
A
1905 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1906 /* no return */
1907
1908
1909Entry(hndl_unix_scall64)
6d2010ae 1910 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1911 sti
1912
39236c6e 1913 CCALL1(unix_syscall64, %r15)
b0d623f7
A
1914 /*
1915 * always returns through thread_exception_return
1916 */
1917
1918
1919Entry(hndl_mach_scall64)
6d2010ae 1920 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1921 sti
1922
39236c6e 1923 CCALL1(mach_call_munger64, %r15)
b0d623f7
A
1924 /*
1925 * always returns through thread_exception_return
1926 */
1927
1928
1929
1930Entry(hndl_mdep_scall64)
1931 sti
1932
39236c6e 1933 CCALL1(machdep_syscall64, %r15)
b0d623f7
A
1934 /*
1935 * always returns through thread_exception_return
1936 */
1937
b0d623f7 1938Entry(hndl_diag_scall64)
39236c6e 1939 CCALL1(diagCall64, %r15) // Call diagnostics
316670eb 1940 test %eax, %eax // What kind of return is this?
060df5ea 1941 je 1f // - branch if bad (zero)
060df5ea
A
1942 jmp EXT(return_to_user) // Normal return, do not check asts...
19431:
316670eb 1944 sti
b0d623f7
A
1945 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1946 /* no return */
5c9f4661 1947/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
b0d623f7 1948Entry(hndl_machine_check)
5c9f4661 1949 /* Adjust SP and savearea to their canonical, non-aliased addresses */
39236c6e 1950 CCALL1(panic_machine_check64, %r15)
b0d623f7
A
1951 hlt
1952
1953Entry(hndl_double_fault)
39236c6e 1954 CCALL1(panic_double_fault64, %r15)
b0d623f7 1955 hlt