]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/idt64.s
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / x86_64 / idt64.s
CommitLineData
b0d623f7 1/*
6d2010ae 2 * Copyright (c) 2010 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <i386/asm.h>
29#include <assym.s>
39236c6e 30#include <debug.h>
b0d623f7 31#include <i386/eflags.h>
6d2010ae 32#include <i386/rtclock_asm.h>
b0d623f7
A
33#include <i386/trap.h>
34#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
35#include <mach/i386/syscall_sw.h>
36#include <i386/postcode.h>
37#include <i386/proc_reg.h>
38#include <mach/exception_types.h>
39
40#if DEBUG
41#define DEBUG_IDT64 1
42#endif
43
44/*
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
48 *
49 * Code here is structured as follows:
50 *
51 * stubs Code called directly from an IDT vector.
52 * All entry points have the "idt64_" prefix and they are built
53 * using macros expanded by the inclusion of idt_table.h.
54 * This code performs vector-dependent identification and jumps
55 * into the dispatch code.
56 *
57 * dispatch The dispatch code is responsible for saving the thread state
58 * (which is either 64-bit or 32-bit) and then jumping to the
59 * class handler identified by the stub.
60 *
61 * returns Code to restore state and return to the previous context.
62 *
63 * handlers There are several classes of handlers:
64 * interrupt - asynchronous events typically from external devices
65 * trap - synchronous events due to thread execution
66 * syscall - synchronous system call request
67 * fatal - fatal traps
68 */
b0d623f7 69/*
5c9f4661 70 * Indices of handlers for each exception type.
b0d623f7 71 */
5c9f4661
A
72#define HNDL_ALLINTRS 0
73#define HNDL_ALLTRAPS 1
74#define HNDL_SYSENTER 2
75#define HNDL_SYSCALL 3
76#define HNDL_UNIX_SCALL 4
77#define HNDL_MACH_SCALL 5
78#define HNDL_MDEP_SCALL 6
79#define HNDL_DOUBLE_FAULT 7
80#define HNDL_MACHINE_CHECK 8
81
82/* Begin double-mapped descriptor section */
83
84.section __HIB, __desc
85.globl EXT(idt64_hndl_table0)
86EXT(idt64_hndl_table0):
87 .quad EXT(ks_dispatch)
88 .quad EXT(ks_64bit_return)
89 .quad 0 /* Populated with CPU shadow displacement*/
90 .quad EXT(ks_return)
91
92EXT(idt64_hndl_table1):
93 .quad EXT(hndl_allintrs)
94 .quad EXT(hndl_alltraps)
95 .quad EXT(hndl_sysenter)
96 .quad EXT(hndl_syscall)
97 .quad EXT(hndl_unix_scall)
98 .quad EXT(hndl_mach_scall)
99 .quad EXT(hndl_mdep_scall)
100 .quad EXT(hndl_double_fault)
101 .quad EXT(hndl_machine_check)
102.text
103
b0d623f7
A
104
105/* The wrapper for all non-special traps/interrupts */
106/* Everything up to PUSH_FUNCTION is just to output
107 * the interrupt number out to the postcode display
108 */
109#if DEBUG_IDT64
110#define IDT_ENTRY_WRAPPER(n, f) \
111 push %rax ;\
112 POSTCODE2(0x6400+n) ;\
113 pop %rax ;\
5c9f4661 114 pushq $(f) ;\
b0d623f7
A
115 pushq $(n) ;\
116 jmp L_dispatch
117#else
118#define IDT_ENTRY_WRAPPER(n, f) \
5c9f4661 119 pushq $(f) ;\
b0d623f7
A
120 pushq $(n) ;\
121 jmp L_dispatch
122#endif
123
124/* A trap that comes with an error code already on the stack */
125#define TRAP_ERR(n, f) \
126 Entry(f) ;\
127 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
128
129/* A normal trap */
130#define TRAP(n, f) \
131 Entry(f) ;\
132 pushq $0 ;\
133 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
134
135#define USER_TRAP TRAP
136
137/* An interrupt */
138#define INTERRUPT(n) \
139 Entry(_intr_ ## n) ;\
140 pushq $0 ;\
141 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
142
143/* A trap with a special-case handler, hence we don't need to define anything */
144#define TRAP_SPC(n, f)
39236c6e
A
145#define TRAP_IST1(n, f)
146#define TRAP_IST2(n, f)
b0d623f7
A
147#define USER_TRAP_SPC(n, f)
148
5c9f4661
A
149/* Begin double-mapped text section */
150.section __HIB, __text
b0d623f7
A
151/* Generate all the stubs */
152#include "idt_table.h"
153
5c9f4661
A
154Entry(idt64_page_fault)
155 pushq $(HNDL_ALLTRAPS)
156 push $(T_PAGE_FAULT)
157 jmp L_dispatch
158
159Entry(idt64_debug)
160 push $0 /* error code */
161 pushq $(HNDL_ALLTRAPS)
162 pushq $(T_DEBUG)
163 jmp L_dispatch
164/*
165 * Legacy interrupt gate System call handlers.
166 * These are entered via a syscall interrupt. The system call number in %rax
167 * is saved to the error code slot in the stack frame. We then branch to the
168 * common state saving code.
169 */
170
171#ifndef UNIX_INT
172#error NO UNIX INT!!!
173#endif
174Entry(idt64_unix_scall)
175 pushq %rax /* save system call number */
176 pushq $(HNDL_UNIX_SCALL)
177 pushq $(UNIX_INT)
178 jmp L_dispatch
179
180Entry(idt64_mach_scall)
181 pushq %rax /* save system call number */
182 pushq $(HNDL_MACH_SCALL)
183 pushq $(MACH_INT)
184 jmp L_dispatch
185
186Entry(idt64_mdep_scall)
187 pushq %rax /* save system call number */
188 pushq $(HNDL_MDEP_SCALL)
189 pushq $(MACHDEP_INT)
190 jmp L_dispatch
191
192/*
193 * For GP/NP/SS faults, we use the IST1 stack.
194 * For faults from user-space, we have to copy the machine state to the
195 * PCB stack and then dispatch as normal.
196 * For faults in kernel-space, we need to scrub for kernel exit faults and
197 * treat these as user-space faults. But for all other kernel-space faults
198 * we continue to run on the IST1 stack and we dispatch to handle the fault
199 * as fatal.
200 */
201Entry(idt64_gen_prot)
202 pushq $(HNDL_ALLTRAPS)
203 pushq $(T_GENERAL_PROTECTION)
204 jmp L_dispatch
205
206Entry(idt64_stack_fault)
207 pushq $(HNDL_ALLTRAPS)
208 pushq $(T_STACK_FAULT)
209 jmp L_dispatch
210
211Entry(idt64_segnp)
212 pushq $(HNDL_ALLTRAPS)
213 pushq $(T_SEGMENT_NOT_PRESENT)
214 jmp L_dispatch
215
216/*
217 * Fatal exception handlers:
218 */
219Entry(idt64_db_task_dbl_fault)
220 pushq $(HNDL_DOUBLE_FAULT)
221 pushq $(T_DOUBLE_FAULT)
222 jmp L_dispatch
223
224Entry(idt64_db_task_stk_fault)
225 pushq $(HNDL_DOUBLE_FAULT)
226 pushq $(T_STACK_FAULT)
227 jmp L_dispatch
228
229Entry(idt64_mc)
230 push $(0) /* Error */
231 pushq $(HNDL_MACHINE_CHECK)
232 pushq $(T_MACHINE_CHECK)
233 jmp L_dispatch
234
235/*
236 * NMI
237 * This may or may not be fatal but extreme care is required
238 * because it may fall when control was already in another trampoline.
239 *
240 * We get here on IST2 stack which is used for NMIs only.
241 */
242Entry(idt64_nmi)
243 push %rax /* save RAX to ISF64_ERR */
244 push %rcx /* save RCX to ISF64_TRAPFN */
245 push %rdx /* save RDX to ISF64_TRAPNO */
246 jmp L_dispatch
247
248Entry(idt64_double_fault)
249 pushq $(HNDL_DOUBLE_FAULT)
250 pushq $(T_DOUBLE_FAULT)
251 jmp L_dispatch
252
253Entry(hi64_syscall)
254Entry(idt64_syscall)
255 swapgs
256 /* Use RAX as a temporary by shifting its contents into R11[32:63]
257 * The systemcall number is defined to be a 32-bit quantity, as is
258 * RFLAGS.
259 */
260 shlq $32, %rax
261 or %rax, %r11
262.globl EXT(dblsyscall_patch_point)
263EXT(dblsyscall_patch_point):
264// movabsq $0x12345678ABCDEFFFULL, %rax
265 /* Generate offset to the double-mapped per-CPU data shadow
266 * into RAX
267 */
268 leaq EXT(idt64_hndl_table0)(%rip), %rax
269 mov 16(%rax), %rax
270 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
271 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
272 sub $(ISF64_SIZE), %rsp
273
274 /*
275 * Synthesize an ISF frame on the exception stack
276 */
277 movl $(USER_DS), ISF64_SS(%rsp)
278 mov %rcx, ISF64_RIP(%rsp) /* rip */
279
280 mov %gs:CPU_UBER_TMP(%rax), %rcx
281 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
282
283 mov %r11, %rax
284 shrq $32, %rax /* Restore RAX */
285 mov %r11d, %r11d /* Clear r11[32:63] */
286
287 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
288 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
289 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
290 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
291 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
292 swapgs
293 jmp L_dispatch /* this can only be 64-bit */
294
295Entry(hi64_sysenter)
296Entry(idt64_sysenter)
297 /* Synthesize an interrupt stack frame onto the
298 * exception stack.
299 */
300 push $(USER_DS) /* ss */
301 push %rcx /* uesp */
302 pushf /* flags */
303 /*
304 * Clear, among others, the Nested Task (NT) flags bit;
305 * this is zeroed by INT, but not by SYSENTER.
306 */
307 push $0
308 popf
309 push $(SYSENTER_CS) /* cs */
310L_sysenter_continue:
311 push %rdx /* eip */
312 push %rax /* err/eax - syscall code */
313 pushq $(HNDL_SYSENTER)
314 pushq $(T_SYSENTER)
315 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
316 jmp L_dispatch
317
b0d623f7
A
318/*
319 * Common dispatch point.
320 * Determine what mode has been interrupted and save state accordingly.
39236c6e
A
321 * Here with:
322 * rsp from user-space: interrupt state in PCB, or
323 * from kernel-space: interrupt state in kernel or interrupt stack
324 * GSBASE from user-space: pthread area, or
325 * from kernel-space: cpu_data
b0d623f7 326 */
5c9f4661 327
b0d623f7 328L_dispatch:
5c9f4661
A
329 pushq %rax
330 testb $3, 8+ISF64_CS(%rsp)
331 jz 1f
332 swapgs
333 leaq EXT(idt64_hndl_table0)(%rip), %rax
334 mov 16(%rax), %rax
335
336 mov %gs:CPU_TASK_CR3(%rax), %rax
337 mov %rax, %cr3
338#if DEBUG
339 mov %rax, %gs:CPU_ENTRY_CR3
340#endif
3411:
342 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
343 leaq EXT(idt64_hndl_table0)(%rip), %rax
344 /* Indirect branch to non-doublemapped trampolines */
345 jmp *(%rax)
346/* User return: register restoration and address space switch sequence */
347Entry(ks_64bit_return)
348 mov R64_R14(%r15), %r14
349 mov R64_R13(%r15), %r13
350 mov R64_R12(%r15), %r12
351 mov R64_R11(%r15), %r11
352 mov R64_R10(%r15), %r10
353 mov R64_R9(%r15), %r9
354 mov R64_R8(%r15), %r8
355 mov R64_RSI(%r15), %rsi
356 mov R64_RDI(%r15), %rdi
357 mov R64_RBP(%r15), %rbp
358 mov R64_RDX(%r15), %rdx
359 mov R64_RCX(%r15), %rcx
360 mov R64_RBX(%r15), %rbx
361 mov R64_RAX(%r15), %rax
362 /* Switch to per-CPU exception stack */
363 mov %gs:CPU_ESTACK, %rsp
364
365 /* Synthesize interrupt stack frame from PCB savearea to exception stack */
366 push R64_SS(%r15)
367 push R64_RSP(%r15)
368 push R64_RFLAGS(%r15)
369 push R64_CS(%r15)
370 push R64_RIP(%r15)
371
372 mov R64_R15(%r15), %r15
373 cmpq $(KERNEL64_CS), 8(%rsp)
374 jz 1f
375 /* Discover user cr3/ASID */
376 push %rax
377 mov %gs:CPU_UCR3, %rax
378#if DEBUG
379 mov %rax, %gs:CPU_EXIT_CR3
380#endif
381 mov %rax, %cr3
382 /* Continue execution on the shared/doublemapped trampoline */
383 pop %rax
384 swapgs
3851:
386 cmpl $(SYSCALL_CS), 8(%rsp) /* test for exit via SYSRET */
387 je L_sysret
388EXT(ret64_iret):
389 iretq /* return from interrupt */
390L_sysret:
391 /*
392 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
393 * rcx user rip
394 * r11 user rflags
395 * rsp user stack pointer
396 */
397 pop %rcx
398 add $8, %rsp
399 pop %r11
400 pop %rsp
401 sysretq /* return from system call */
402/* End of double-mapped TEXT */
403.text
404
405Entry(ks_dispatch)
406 popq %rax
6d2010ae 407 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
5c9f4661 408 je EXT(ks_dispatch_kernel)
b0d623f7 409
5c9f4661
A
410 mov %rax, %gs:CPU_UBER_TMP
411 mov %gs:CPU_UBER_ISF, %rax
412 add $(ISF64_SIZE), %rax
413
414 xchg %rsp, %rax
415/* Memory to memory moves (aint x86 wonderful):
416 * Transfer the exception frame from the per-CPU exception stack to the
417 * 'PCB' stack programmed at cswitch.
418 */
419 push ISF64_SS(%rax)
420 push ISF64_RSP(%rax)
421 push ISF64_RFLAGS(%rax)
422 push ISF64_CS(%rax)
423 push ISF64_RIP(%rax)
424 push ISF64_ERR(%rax)
425 push ISF64_TRAPFN(%rax)
426 push ISF64_TRAPNO(%rax)
427 mov %gs:CPU_UBER_TMP, %rax
428 jmp EXT(ks_dispatch_user)
b0d623f7 429
5c9f4661
A
430Entry (ks_return)
431 jmp .
432
433Entry(ks_dispatch_user)
060df5ea 434 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
39236c6e
A
435 je L_dispatch_U32 /* 32-bit user task */
436
437L_dispatch_U64:
438 subq $(ISS64_OFFSET), %rsp
439 mov %r15, R64_R15(%rsp)
440 mov %rsp, %r15
441 mov %gs:CPU_KERNEL_STACK, %rsp
442 jmp L_dispatch_64bit
443
5c9f4661 444Entry(ks_dispatch_kernel)
39236c6e
A
445 subq $(ISS64_OFFSET), %rsp
446 mov %r15, R64_R15(%rsp)
447 mov %rsp, %r15
b0d623f7
A
448
449/*
450 * Here for 64-bit user task or kernel
451 */
39236c6e
A
452L_dispatch_64bit:
453 movl $(SS_64), SS_FLAVOR(%r15)
b0d623f7
A
454
455 /*
456 * Save segment regs - for completeness since theyre not used.
457 */
39236c6e
A
458 movl %fs, R64_FS(%r15)
459 movl %gs, R64_GS(%r15)
b0d623f7
A
460
461 /* Save general-purpose registers */
39236c6e
A
462 mov %rax, R64_RAX(%r15)
463 mov %rbx, R64_RBX(%r15)
464 mov %rcx, R64_RCX(%r15)
465 mov %rdx, R64_RDX(%r15)
466 mov %rbp, R64_RBP(%r15)
467 mov %rdi, R64_RDI(%r15)
468 mov %rsi, R64_RSI(%r15)
469 mov %r8, R64_R8(%r15)
470 mov %r9, R64_R9(%r15)
471 mov %r10, R64_R10(%r15)
472 mov %r11, R64_R11(%r15)
473 mov %r12, R64_R12(%r15)
474 mov %r13, R64_R13(%r15)
475 mov %r14, R64_R14(%r15)
b0d623f7
A
476
477 /* cr2 is significant only for page-faults */
478 mov %cr2, %rax
39236c6e 479 mov %rax, R64_CR2(%r15)
b0d623f7 480
39236c6e
A
481 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
482 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
483 mov R64_CS(%r15), %esi /* %esi := cs for later */
b0d623f7 484
39236c6e 485 jmp L_common_dispatch
b0d623f7
A
486
487L_64bit_entry_reject:
488 /*
489 * Here for a 64-bit user attempting an invalid kernel entry.
490 */
5c9f4661 491 movq $(HNDL_ALLTRAPS), ISF64_TRAPFN(%rsp)
b0d623f7 492 movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
39236c6e 493 jmp L_dispatch_U64
b0d623f7 494
5c9f4661 495Entry(ks_32bit_entry_check)
b0d623f7
A
496 /*
497 * Check we're not a confused 64-bit user.
498 */
499 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
500 jne L_64bit_entry_reject
501 /* fall through to 32-bit handler: */
502
39236c6e
A
503L_dispatch_U32: /* 32-bit user task */
504 subq $(ISS64_OFFSET), %rsp
505 mov %rsp, %r15
506 mov %gs:CPU_KERNEL_STACK, %rsp
507 movl $(SS_32), SS_FLAVOR(%r15)
b0d623f7
A
508
509 /*
510 * Save segment regs
511 */
39236c6e
A
512 movl %ds, R32_DS(%r15)
513 movl %es, R32_ES(%r15)
514 movl %fs, R32_FS(%r15)
515 movl %gs, R32_GS(%r15)
b0d623f7
A
516
517 /*
518 * Save general 32-bit registers
519 */
39236c6e
A
520 mov %eax, R32_EAX(%r15)
521 mov %ebx, R32_EBX(%r15)
522 mov %ecx, R32_ECX(%r15)
523 mov %edx, R32_EDX(%r15)
524 mov %ebp, R32_EBP(%r15)
525 mov %esi, R32_ESI(%r15)
526 mov %edi, R32_EDI(%r15)
b0d623f7
A
527
528 /* Unconditionally save cr2; only meaningful on page faults */
529 mov %cr2, %rax
39236c6e 530 mov %eax, R32_CR2(%r15)
b0d623f7
A
531
532 /*
533 * Copy registers already saved in the machine state
534 * (in the interrupt stack frame) into the compat save area.
535 */
39236c6e
A
536 mov R64_RIP(%r15), %eax
537 mov %eax, R32_EIP(%r15)
538 mov R64_RFLAGS(%r15), %eax
539 mov %eax, R32_EFLAGS(%r15)
540 mov R64_RSP(%r15), %eax
541 mov %eax, R32_UESP(%r15)
542 mov R64_SS(%r15), %eax
543 mov %eax, R32_SS(%r15)
544L_dispatch_U32_after_fault:
545 mov R64_CS(%r15), %esi /* %esi := %cs for later */
546 mov %esi, R32_CS(%r15)
547 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
548 mov %ebx, R32_TRAPNO(%r15)
549 mov R64_ERR(%r15), %eax
550 mov %eax, R32_ERR(%r15)
551 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
b0d623f7
A
552
553L_common_dispatch:
fe8ab488
A
554 cld /* Ensure the direction flag is clear in the kernel */
555 cmpl $0, EXT(pmap_smap_enabled)(%rip)
556 je 1f
557 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
5581:
b0d623f7 559 /*
39037602 560 * On entering the kernel, we typically don't switch CR3
b0d623f7 561 * because the kernel shares the user's address space.
39037602
A
562 * But we mark the kernel's cr3 as "active" for TLB coherency evaluation
563 * If, however, the CPU's invalid TLB flag is set, we have to invalidate the TLB
564 * since the kernel pagetables were changed while we were in userspace.
b0d623f7 565 *
39037602
A
566 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
567 * we switch to the kernel's address space on entry. Also,
568 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
b0d623f7
A
569 * so that illicit accesses to userspace can be trapped.
570 */
571 mov %gs:CPU_KERNEL_CR3, %rcx
572 mov %rcx, %gs:CPU_ACTIVE_CR3
573 test $3, %esi /* user/kernel? */
fe8ab488 574 jz 2f /* skip cr3 reload from kernel */
b0d623f7 575 xor %rbp, %rbp
39037602
A
576 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
577 jnz 11f
b0d623f7 578 cmpl $0, EXT(no_shared_cr3)(%rip)
fe8ab488 579 je 2f
39037602
A
58011:
581 xor %eax, %eax
582 movw %gs:CPU_KERNEL_PCID, %ax
583 or %rax, %rcx
b0d623f7 584 mov %rcx, %cr3 /* load kernel cr3 */
fe8ab488
A
585 jmp 4f /* and skip tlb flush test */
5862:
6d2010ae
A
587 mov %gs:CPU_ACTIVE_CR3+4, %rcx
588 shr $32, %rcx
589 testl %ecx, %ecx
fe8ab488 590 jz 4f
4bd07ac2 591 movl $0, %gs:CPU_TLB_INVALID
6d2010ae
A
592 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
593 and $(~CR4_PGE), %rcx
594 mov %rcx, %cr4
595 or $(CR4_PGE), %rcx
596 mov %rcx, %cr4
fe8ab488 5974:
b0d623f7 598 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
5ba3f43e
A
599 testq %rcx, %rcx
600 je 5f
fe8ab488 601 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
6d2010ae 602 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
fe8ab488 603 je 5f
316670eb 604 xor %ecx, %ecx /* If so, reset DR7 (the control) */
b0d623f7 605 mov %rcx, %dr7
fe8ab488 6065:
6d2010ae 607 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
b0d623f7 608 /* Dispatch the designated handler */
5c9f4661
A
609 leaq EXT(idt64_hndl_table1)(%rip), %rax
610 jmp *(%rax, %rdx, 8)
b0d623f7
A
611
612/*
613 * Control is passed here to return to user.
614 */
615Entry(return_to_user)
616 TIME_TRAP_UEXIT
617
618Entry(ret_to_user)
619// XXX 'Be nice to tidy up this debug register restore sequence...
620 mov %gs:CPU_ACTIVE_THREAD, %rdx
6d2010ae 621 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
b0d623f7 622
316670eb 623 test %rax, %rax /* Is there a debug register context? */
b0d623f7
A
624 je 2f /* branch if not */
625 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
626 jne 1f
627 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
628 movq %rcx, %dr0
629 movl DS_DR1(%rax), %ecx
630 movq %rcx, %dr1
631 movl DS_DR2(%rax), %ecx
632 movq %rcx, %dr2
633 movl DS_DR3(%rax), %ecx
634 movq %rcx, %dr3
635 movl DS_DR7(%rax), %ecx
636 movq %rcx, %gs:CPU_DR7
637 jmp 2f
6381:
639 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
640 mov %rcx, %dr0
641 mov DS64_DR1(%rax), %rcx
642 mov %rcx, %dr1
643 mov DS64_DR2(%rax), %rcx
644 mov %rcx, %dr2
645 mov DS64_DR3(%rax), %rcx
646 mov %rcx, %dr3
647 mov DS64_DR7(%rax), %rcx
648 mov %rcx, %gs:CPU_DR7
6492:
650 /*
39037602 651 * On exiting the kernel there's typically no need to switch cr3 since we're
b0d623f7 652 * already running in the user's address space which includes the
39037602
A
653 * kernel. We now mark the task's cr3 as active, for TLB coherency.
654 * If the target address space has a pagezero mapping present, or
655 * if no_shared_cr3 is set, we do need to switch cr3 at this point.
b0d623f7
A
656 */
657 mov %gs:CPU_TASK_CR3, %rcx
658 mov %rcx, %gs:CPU_ACTIVE_CR3
39037602
A
659 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
660 jnz L_cr3_switch_island
6d2010ae
A
661 movl EXT(no_shared_cr3)(%rip), %eax
662 test %eax, %eax /* -no_shared_cr3 */
39037602
A
663 jnz L_cr3_switch_island
664
665L_cr3_switch_return:
b0d623f7
A
666 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
667 cmp $0, %rax
668 je 4f
669 mov %rax, %dr7 /* Set DR7 */
670 movq $0, %gs:CPU_DR7
6714:
39236c6e 672 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
b0d623f7
A
673 je L_64bit_return
674
675L_32bit_return:
676#if DEBUG_IDT64
39236c6e 677 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
b0d623f7
A
678 je 1f
679 cli
680 POSTCODE2(0x6432)
fe8ab488 681 CCALL1(panic_idt64, %r15)
b0d623f7
A
6821:
683#endif /* DEBUG_IDT64 */
684
685 /*
686 * Restore registers into the machine state for iret.
39236c6e 687 * Here on fault stack and PCB address in R11.
b0d623f7 688 */
39236c6e
A
689 movl R32_EIP(%r15), %eax
690 movl %eax, R64_RIP(%r15)
691 movl R32_EFLAGS(%r15), %eax
692 movl %eax, R64_RFLAGS(%r15)
693 movl R32_CS(%r15), %eax
694 movl %eax, R64_CS(%r15)
695 movl R32_UESP(%r15), %eax
696 movl %eax, R64_RSP(%r15)
697 movl R32_SS(%r15), %eax
698 movl %eax, R64_SS(%r15)
b0d623f7 699
5c9f4661
A
700 /* Validate DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
701 /* Exempt "known good" statically configured selectors, e.g. USER_DS and 0 */
702 cmpl $(USER_DS), R32_DS(%r15)
703 jz 22f
704 cmpl $0, R32_DS(%r15)
705 jz 22f
706 larw R32_DS(%r15), %ax
707 jz 22f
708 movl $(USER_DS), R32_DS(%r15)
70922:
710 cmpl $(USER_DS), R32_ES(%r15)
711 jz 33f
712 cmpl $0, R32_ES(%r15)
713 jz 33f
714 larw R32_ES(%r15), %ax
715 jz 33f
716 movl $(USER_DS), R32_ES(%r15)
71733:
718 cmpl $(USER_DS), R32_FS(%r15)
719 jz 44f
720 cmpl $0, R32_FS(%r15)
721 jz 44f
722 larw R32_FS(%r15), %ax
723 jz 44f
724 movl $(USER_DS), R32_FS(%r15)
72544:
726 cmpl $(USER_CTHREAD), R32_GS(%r15)
727 jz 55f
728 cmpl $0, R32_GS(%r15)
729 jz 55f
730 larw R32_GS(%r15), %ax
731 jz 55f
732 movl $(USER_CTHREAD), R32_GS(%r15)
73355:
b0d623f7
A
734 /*
735 * Restore general 32-bit registers
736 */
39236c6e
A
737 movl R32_EAX(%r15), %eax
738 movl R32_EBX(%r15), %ebx
739 movl R32_ECX(%r15), %ecx
740 movl R32_EDX(%r15), %edx
741 movl R32_EBP(%r15), %ebp
742 movl R32_ESI(%r15), %esi
743 movl R32_EDI(%r15), %edi
b0d623f7
A
744
745 /*
39236c6e
A
746 * Restore segment registers. A segment exception taken here will
747 * push state on the IST1 stack and will not affect the "PCB stack".
b0d623f7 748 */
39236c6e 749 mov %r15, %rsp /* Set the PCB as the stack */
b0d623f7 750 swapgs
00867663
A
751
752 xor %r8, %r8
753 xor %r9, %r9
754 xor %r10, %r10
755 xor %r11, %r11
756 xor %r12, %r12
757 xor %r13, %r13
758 xor %r14, %r14
759 xor %r15, %r15
760
b0d623f7 761EXT(ret32_set_ds):
5c9f4661 762 movw R32_DS(%rsp), %ds
b0d623f7 763EXT(ret32_set_es):
5c9f4661 764 movw R32_ES(%rsp), %es
b0d623f7 765EXT(ret32_set_fs):
5c9f4661 766 movw R32_FS(%rsp), %fs
b0d623f7 767EXT(ret32_set_gs):
5c9f4661 768 movw R32_GS(%rsp), %gs
b0d623f7
A
769
770 /* pop compat frame + trapno, trapfn and error */
39236c6e 771 add $(ISS64_OFFSET)+8+8+8, %rsp
316670eb 772 cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
b0d623f7 773 /* test for fast entry/exit */
316670eb 774 je L_fast_exit
b0d623f7 775EXT(ret32_iret):
316670eb 776 iretq /* return from interrupt */
b0d623f7
A
777
778L_fast_exit:
779 pop %rdx /* user return eip */
780 pop %rcx /* pop and toss cs */
781 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
782 popf /* flags - carry denotes failure */
783 pop %rcx /* user return esp */
784 sti /* interrupts enabled after sysexit */
39236c6e 785 sysexitl /* 32-bit sysexit */
b0d623f7 786
39037602
A
787L_cr3_switch_island:
788 xor %eax, %eax
789 movw %gs:CPU_ACTIVE_PCID, %ax
790 or %rax, %rcx
791 mov %rcx, %cr3
792 jmp L_cr3_switch_return
793
b0d623f7
A
794ret_to_kernel:
795#if DEBUG_IDT64
39236c6e 796 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
b0d623f7
A
797 je 1f
798 cli
799 POSTCODE2(0x6464)
39236c6e 800 CCALL1(panic_idt64, %r15)
b0d623f7
A
801 hlt
8021:
39236c6e 803 cmpl $(KERNEL64_CS), R64_CS(%r15)
b0d623f7 804 je 2f
39236c6e 805 CCALL1(panic_idt64, %r15)
b0d623f7
A
806 hlt
8072:
808#endif
809
810L_64bit_return:
39236c6e
A
811 /*
812 * Restore general 64-bit registers.
813 * Here on fault stack and PCB address in R15.
814 */
5c9f4661
A
815 leaq EXT(idt64_hndl_table0)(%rip), %rax
816 jmp *8(%rax)
b0d623f7 817
5c9f4661 818Entry(ks_idt64_debug_kernel)
b0d623f7
A
819 /*
820 * trap came from kernel mode
821 */
822
823 push %rax /* save %rax temporarily */
b0d623f7 824 lea EXT(idt64_sysenter)(%rip), %rax
6d2010ae 825 cmp %rax, ISF64_RIP+8(%rsp)
b0d623f7 826 pop %rax
5c9f4661 827 jne EXT(ks_dispatch_kernel)
b0d623f7
A
828 /*
829 * Interrupt stack frame has been pushed on the temporary stack.
6d2010ae 830 * We have to switch to pcb stack and patch up the saved state.
b0d623f7 831 */
6d2010ae
A
832 mov %rcx, ISF64_ERR(%rsp) /* save %rcx in error slot */
833 mov ISF64_SS+8(%rsp), %rcx /* top of temp stack -> pcb stack */
b0d623f7
A
834 xchg %rcx,%rsp /* switch to pcb stack */
835 push $(USER_DS) /* ss */
6d2010ae
A
836 push ISF64_ERR(%rcx) /* saved %rcx into rsp slot */
837 push ISF64_RFLAGS(%rcx) /* rflags */
b0d623f7 838 push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */
6d2010ae 839 mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
b0d623f7 840 jmp L_sysenter_continue /* continue sysenter entry */
b0d623f7 841
5c9f4661 842Entry(ks_trap_check_kernel_exit)
39236c6e
A
843 testb $3,ISF64_CS(%rsp)
844 jz L_kernel_gpf
845
846 /* Here for fault from user-space. Copy interrupt state to PCB. */
847 swapgs
b0d623f7 848 push %rax
39236c6e
A
849 mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */
850 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
851 mov ISF64_SS+8(%rsp), %rax
852 mov %rax, ISF64_SS(%rcx)
853 mov ISF64_RSP+8(%rsp), %rax
854 mov %rax, ISF64_RSP(%rcx)
855 mov ISF64_RFLAGS+8(%rsp), %rax
856 mov %rax, ISF64_RFLAGS(%rcx)
857 mov ISF64_CS+8(%rsp), %rax
858 mov %rax, ISF64_CS(%rcx)
859 mov ISF64_RIP+8(%rsp), %rax
860 mov %rax, ISF64_RIP(%rcx)
861 mov ISF64_ERR+8(%rsp), %rax
862 mov %rax, ISF64_ERR(%rcx)
863 mov ISF64_TRAPFN+8(%rsp), %rax
864 mov %rax, ISF64_TRAPFN(%rcx)
865 mov ISF64_TRAPNO+8(%rsp), %rax
866 mov %rax, ISF64_TRAPNO(%rcx)
867 pop %rax
868 mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */
869 xchg %rcx, %rsp /* to PCB stack with user RCX */
5c9f4661 870 jmp EXT(ks_dispatch_user)
b0d623f7 871
39236c6e
A
872L_kernel_gpf:
873 /* Here for GPF from kernel_space. Check for recoverable cases. */
874 push %rax
b0d623f7 875 leaq EXT(ret32_iret)(%rip), %rax
6d2010ae 876 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
877 je L_fault_iret
878 leaq EXT(ret64_iret)(%rip), %rax
6d2010ae 879 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
880 je L_fault_iret
881 leaq EXT(ret32_set_ds)(%rip), %rax
6d2010ae 882 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
883 je L_32bit_fault_set_seg
884 leaq EXT(ret32_set_es)(%rip), %rax
6d2010ae 885 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
886 je L_32bit_fault_set_seg
887 leaq EXT(ret32_set_fs)(%rip), %rax
6d2010ae 888 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
889 je L_32bit_fault_set_seg
890 leaq EXT(ret32_set_gs)(%rip), %rax
6d2010ae 891 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7 892 je L_32bit_fault_set_seg
5c9f4661 893 jmp EXT(ks_kernel_trap)
39236c6e 894 /* Fall through */
6d2010ae 895
5c9f4661 896Entry(ks_kernel_trap)
6d2010ae
A
897 /*
898 * Here after taking an unexpected trap from kernel mode - perhaps
899 * while running in the trampolines hereabouts.
900 * Note: %rax has been pushed on stack.
901 * Make sure we're not on the PCB stack, if so move to the kernel stack.
902 * This is likely a fatal condition.
39236c6e 903 * But first, ensure we have the kernel gs base active...
6d2010ae 904 */
39236c6e
A
905 push %rcx
906 push %rdx
907 mov $(MSR_IA32_GS_BASE), %ecx
908 rdmsr /* read kernel gsbase */
909 test $0x80000000, %edx /* test MSB of address */
910 jne 1f
911 swapgs /* so swap */
6d2010ae 9121:
39236c6e
A
913 pop %rdx
914 pop %rcx
915
6d2010ae
A
916 movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
917 subq %rsp, %rax
918 cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
919 jb 2f /* - yes, deal with it */
920 pop %rax /* - no, restore %rax */
5c9f4661 921 jmp EXT(ks_dispatch_kernel)
6d2010ae
A
9222:
923 /*
924 * Here if %rsp is in the PCB
925 * Copy the interrupt stack frame from PCB stack to kernel stack
926 */
927 movq %gs:CPU_KERNEL_STACK, %rax
928 xchgq %rax, %rsp
929 pushq 8+ISF64_SS(%rax)
930 pushq 8+ISF64_RSP(%rax)
931 pushq 8+ISF64_RFLAGS(%rax)
932 pushq 8+ISF64_CS(%rax)
933 pushq 8+ISF64_RIP(%rax)
934 pushq 8+ISF64_ERR(%rax)
935 pushq 8+ISF64_TRAPFN(%rax)
936 pushq 8+ISF64_TRAPNO(%rax)
937 movq (%rax), %rax
5c9f4661 938 jmp EXT(ks_dispatch_kernel)
39236c6e 939
b0d623f7 940
b0d623f7
A
941/*
942 * GP/NP fault on IRET: CS or SS is in error.
39236c6e
A
943 * User GSBASE is active.
944 * On IST1 stack containing:
945 * (rax saved above, which is immediately popped)
946 * 0 ISF64_TRAPNO: trap code (NP or GP)
947 * 8 ISF64_TRAPFN: trap function
948 * 16 ISF64_ERR: segment number in error (error code)
949 * 24 ISF64_RIP: kernel RIP
950 * 32 ISF64_CS: kernel CS
951 * 40 ISF64_RFLAGS: kernel RFLAGS
952 * 48 ISF64_RSP: kernel RSP
953 * 56 ISF64_SS: kernel SS
954 * On the PCB stack, pointed to by the kernel's RSP is:
955 * 0 user RIP
956 * 8 user CS
957 * 16 user RFLAGS
958 * 24 user RSP
959 * 32 user SS
b0d623f7 960 *
39236c6e
A
961 * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
962 * as a user fault with:
6d2010ae
A
963 * 0 ISF64_TRAPNO: trap code (NP or GP)
964 * 8 ISF64_TRAPFN: trap function
965 * 16 ISF64_ERR: segment number in error (error code)
39236c6e
A
966 * 24 user RIP
967 * 32 user CS
968 * 40 user RFLAGS
969 * 48 user RSP
970 * 56 user SS
b0d623f7
A
971 */
972L_fault_iret:
973 pop %rax /* recover saved %rax */
6d2010ae 974 mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
39236c6e
A
975 mov ISF64_RSP(%rsp), %rax
976 xchg %rax, %rsp /* switch to PCB stack */
977 push ISF64_ERR(%rax)
978 push ISF64_TRAPFN(%rax)
979 push ISF64_TRAPNO(%rax)
980 mov ISF64_RIP(%rax), %rax /* restore rax */
b0d623f7
A
981 /* now treat as fault from user */
982 jmp L_dispatch
983
984/*
985 * Fault restoring a segment register. All of the saved state is still
986 * on the stack untouched since we haven't yet moved the stack pointer.
39236c6e
A
987 * On IST1 stack containing:
988 * (rax saved above, which is immediately popped)
989 * 0 ISF64_TRAPNO: trap code (NP or GP)
990 * 8 ISF64_TRAPFN: trap function
991 * 16 ISF64_ERR: segment number in error (error code)
992 * 24 ISF64_RIP: kernel RIP
993 * 32 ISF64_CS: kernel CS
994 * 40 ISF64_RFLAGS: kernel RFLAGS
995 * 48 ISF64_RSP: kernel RSP
996 * 56 ISF64_SS: kernel SS
997 * On the PCB stack, pointed to by the kernel's RSP is:
998 * 0 user trap code
999 * 8 user trap function
1000 * 16 user err
1001 * 24 user RIP
1002 * 32 user CS
1003 * 40 user RFLAGS
1004 * 48 user RSP
1005 * 56 user SS
b0d623f7
A
1006 */
1007L_32bit_fault_set_seg:
6d2010ae
A
1008 swapgs
1009 pop %rax /* toss saved %rax from stack */
1010 mov ISF64_TRAPNO(%rsp), %rax
1011 mov ISF64_TRAPFN(%rsp), %rcx
1012 mov ISF64_ERR(%rsp), %rdx
1013 mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
39236c6e
A
1014 mov %rax,R64_TRAPNO(%rsp)
1015 mov %rcx,R64_TRAPFN(%rsp)
1016 mov %rdx,R64_ERR(%rsp)
b0d623f7
A
1017 /* now treat as fault from user */
1018 /* except that all the state is */
1019 /* already saved - we just have to */
1020 /* move the trapno and error into */
1021 /* the compatibility frame */
39236c6e 1022 jmp L_dispatch_U32_after_fault
b0d623f7 1023
39236c6e 1024
5c9f4661 1025Entry(ks_idt64_nmi_kernel)
39236c6e
A
1026 /* From user-space: copy interrupt state to user PCB */
1027 swapgs
1028 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
1029 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
1030 swapgs /* swap back for L_dispatch */
1031 jmp 4f /* Copy state to PCB */
1032
10331:
1034 /*
1035 * From kernel-space:
1036 * Determine whether the kernel or user GS is set.
1037 * Set the kernel and ensure that we'll swap back correctly at IRET.
1038 */
1039 mov $(MSR_IA32_GS_BASE), %ecx
1040 rdmsr /* read kernel gsbase */
1041 test $0x80000000, %edx /* test MSB of address */
1042 jne 2f
1043 swapgs /* so swap */
1044 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
10452:
1046 /*
1047 * Determine whether we're on the kernel or interrupt stack
1048 * when the NMI hit.
1049 */
1050 mov ISF64_RSP(%rsp), %rcx
1051 mov %gs:CPU_KERNEL_STACK, %rax
1052 xor %rcx, %rax
1053 and EXT(kernel_stack_mask)(%rip), %rax
1054 test %rax, %rax /* are we on the kernel stack? */
1055 je 3f /* yes */
1056
1057 mov %gs:CPU_INT_STACK_TOP, %rax
1058 dec %rax /* intr stack top is byte above max */
1059 xor %rcx, %rax
1060 and EXT(kernel_stack_mask)(%rip), %rax
1061 test %rax, %rax /* are we on the interrupt stack? */
1062 je 3f /* yes */
1063
1064 mov %gs:CPU_KERNEL_STACK, %rcx
10653:
1066 /* 16-byte-align kernel/interrupt stack for state push */
1067 and $0xFFFFFFFFFFFFFFF0, %rcx
1068
10694:
1070 /*
1071 * Copy state from NMI stack (RSP) to the save area (RCX) which is
1072 * the PCB for user or kernel/interrupt stack from kernel.
1073 * ISF64_ERR(RSP) saved RAX
1074 * ISF64_TRAPFN(RSP) saved RCX
1075 * ISF64_TRAPNO(RSP) saved RDX
1076 */
1077 xchg %rsp, %rcx /* set for pushes */
1078 push ISF64_SS(%rcx)
1079 push ISF64_RSP(%rcx)
1080 push ISF64_RFLAGS(%rcx)
1081 push ISF64_CS(%rcx)
1082 push ISF64_RIP(%rcx)
1083 push $(0) /* error code 0 */
5c9f4661 1084 push $(HNDL_ALLINTRS) /* trapfn allintrs */
39236c6e
A
1085 push $(T_NMI) /* trapno T_NMI */
1086 mov ISF64_ERR(%rcx), %rax
1087 mov ISF64_TRAPNO(%rcx), %rdx
1088 mov ISF64_TRAPFN(%rcx), %rcx
1089 jmp L_dispatch
b0d623f7 1090
39236c6e
A
1091
1092/* All 'exceptions' enter hndl_alltraps, with:
1093 * r15 x86_saved_state_t address
1094 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1095 * esi cs at trap
b0d623f7
A
1096 *
1097 * The rest of the state is set up as:
39236c6e 1098 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1099 * interrupts disabled
1100 * direction flag cleared
1101 */
1102Entry(hndl_alltraps)
1103 mov %esi, %eax
1104 testb $3, %al
1105 jz trap_from_kernel
1106
1107 TIME_TRAP_UENTRY
1108
6d2010ae
A
1109 /* Check for active vtimers in the current task */
1110 mov %gs:CPU_ACTIVE_THREAD, %rcx
fe8ab488 1111 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
6d2010ae
A
1112 mov TH_TASK(%rcx), %rbx
1113 TASK_VTIMER_CHECK(%rbx, %rcx)
1114
39236c6e 1115 CCALL1(user_trap, %r15) /* call user trap routine */
6d2010ae 1116 /* user_trap() unmasks interrupts */
b0d623f7 1117 cli /* hold off intrs - critical section */
b0d623f7
A
1118 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1119
b0d623f7
A
1120
1121Entry(return_from_trap)
39236c6e 1122 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
fe8ab488 1123 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
39236c6e
A
1124 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1125 jz 1f
1126 xorq %rbp, %rbp /* clear framepointer */
1127 mov %r15, %rdi /* Set RDI to current thread */
1128 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
11291:
1130 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
b0d623f7
A
1131 movl %gs:CPU_PENDING_AST,%eax
1132 testl %eax,%eax
39236c6e 1133 je EXT(return_to_user) /* branch if no AST */
b0d623f7
A
1134
1135L_return_from_trap_with_ast:
b0d623f7
A
1136 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1137 je 2f /* no, go handle the AST */
39236c6e 1138 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
b0d623f7
A
1139 je 1f
1140 /* no... 32-bit user mode */
39236c6e 1141 movl R32_EIP(%r15), %edi
6d2010ae 1142 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1143 CCALL(commpage_is_in_pfz32)
1144 testl %eax, %eax
1145 je 2f /* not in the PFZ... go service AST */
39236c6e 1146 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7
A
1147 jmp EXT(return_to_user)
11481:
39236c6e 1149 movq R64_RIP(%r15), %rdi
6d2010ae 1150 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1151 CCALL(commpage_is_in_pfz64)
1152 testl %eax, %eax
1153 je 2f /* not in the PFZ... go service AST */
39236c6e 1154 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7 1155 jmp EXT(return_to_user)
5c9f4661 11562:
b0d623f7 1157
6d2010ae 1158 xorq %rbp, %rbp /* clear framepointer */
5ba3f43e 1159 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
b0d623f7 1160
3e170ce0 1161 cli
39236c6e 1162 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1163 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1164 jmp EXT(return_from_trap) /* and check again (rare) */
1165
1166/*
1167 * Trap from kernel mode. No need to switch stacks.
1168 * Interrupts must be off here - we will set them to state at time of trap
1169 * as soon as it's safe for us to do so and not recurse doing preemption
39236c6e 1170 *
b0d623f7 1171 */
b0d623f7 1172trap_from_kernel:
39236c6e
A
1173 movq %r15, %rdi /* saved state addr */
1174 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
b0d623f7
A
1175 pushq %rbp /* Extend framepointer chain */
1176 movq %rsp, %rbp
6d2010ae 1177 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
b0d623f7
A
1178 popq %rbp
1179 addq $8, %rsp
39236c6e 1180 mov %rsp, %r15 /* DTrace slides stack/saved-state */
b0d623f7
A
1181 cli
1182
1183 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1184 testl $(AST_URGENT),%eax /* any urgent preemption? */
1185 je ret_to_kernel /* no, nothing to do */
39236c6e 1186 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
b0d623f7 1187 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
39236c6e 1188 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
b0d623f7
A
1189 je ret_to_kernel
1190 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1191 jne ret_to_kernel
1192 movq %gs:CPU_KERNEL_STACK,%rax
1193 movq %rsp,%rcx
1194 xorq %rax,%rcx
1195 andq EXT(kernel_stack_mask)(%rip),%rcx
1196 testq %rcx,%rcx /* are we on the kernel stack? */
1197 jne ret_to_kernel /* no, skip it */
1198
5ba3f43e 1199 CCALL(ast_taken_kernel) /* take the AST */
39236c6e
A
1200
1201 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1202 jmp ret_to_kernel
1203
1204
1205/*
1206 * All interrupts on all tasks enter here with:
39236c6e
A
1207 * r15 x86_saved_state_t
1208 * rsp kernel or interrupt stack
b0d623f7
A
1209 * esi cs at trap
1210 *
39236c6e 1211 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1212 * interrupts disabled
1213 * direction flag cleared
1214 */
1215Entry(hndl_allintrs)
1216 /*
1217 * test whether already on interrupt stack
1218 */
1219 movq %gs:CPU_INT_STACK_TOP,%rcx
1220 cmpq %rsp,%rcx
1221 jb 1f
1222 leaq -INTSTACK_SIZE(%rcx),%rdx
1223 cmpq %rsp,%rdx
1224 jb int_from_intstack
060df5ea 12251:
b0d623f7
A
1226 xchgq %rcx,%rsp /* switch to interrupt stack */
1227
1228 mov %cr0,%rax /* get cr0 */
1229 orl $(CR0_TS),%eax /* or in TS bit */
1230 mov %rax,%cr0 /* set cr0 */
1231
b0d623f7 1232 pushq %rcx /* save pointer to old stack */
39236c6e
A
1233 pushq %gs:CPU_INT_STATE /* save previous intr state */
1234 movq %r15,%gs:CPU_INT_STATE /* set intr state */
b0d623f7
A
1235
1236 TIME_INT_ENTRY /* do timing */
1237
6d2010ae
A
1238 /* Check for active vtimers in the current task */
1239 mov %gs:CPU_ACTIVE_THREAD, %rcx
1240 mov TH_TASK(%rcx), %rbx
1241 TASK_VTIMER_CHECK(%rbx, %rcx)
1242
b0d623f7
A
1243 incl %gs:CPU_PREEMPTION_LEVEL
1244 incl %gs:CPU_INTERRUPT_LEVEL
1245
39236c6e 1246 CCALL1(interrupt, %r15) /* call generic interrupt routine */
b0d623f7 1247
5c9f4661 1248.globl EXT(return_to_iret)
b0d623f7
A
1249LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1250
1251 decl %gs:CPU_INTERRUPT_LEVEL
1252 decl %gs:CPU_PREEMPTION_LEVEL
1253
1254 TIME_INT_EXIT /* do timing */
1255
39236c6e
A
1256 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1257 popq %rsp /* switch back to old stack */
1258
b0d623f7 1259 movq %gs:CPU_ACTIVE_THREAD,%rax
6d2010ae 1260 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
b0d623f7
A
1261 cmpq $0,%rax /* Is there a context */
1262 je 1f /* Branch if not */
1263 movl FP_VALID(%rax),%eax /* Load fp_valid */
1264 cmpl $0,%eax /* Check if valid */
1265 jne 1f /* Branch if valid */
1266 clts /* Clear TS */
1267 jmp 2f
12681:
1269 mov %cr0,%rax /* get cr0 */
1270 orl $(CR0_TS),%eax /* or in TS bit */
1271 mov %rax,%cr0 /* set cr0 */
12722:
b0d623f7 1273 /* Load interrupted code segment into %eax */
39236c6e
A
1274 movl R32_CS(%r15),%eax /* assume 32-bit state */
1275 cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
b0d623f7
A
1276#if DEBUG_IDT64
1277 jne 4f
39236c6e 1278 movl R64_CS(%r15),%eax /* 64-bit user mode */
b0d623f7
A
1279 jmp 3f
12804:
39236c6e 1281 cmpl $(SS_32),SS_FLAVOR(%r15)
b0d623f7
A
1282 je 3f
1283 POSTCODE2(0x6431)
39236c6e 1284 CCALL1(panic_idt64, %r15)
b0d623f7
A
1285 hlt
1286#else
1287 jne 3f
39236c6e 1288 movl R64_CS(%r15),%eax /* 64-bit user mode */
b0d623f7
A
1289#endif
12903:
1291 testb $3,%al /* user mode, */
1292 jnz ast_from_interrupt_user /* go handle potential ASTs */
1293 /*
1294 * we only want to handle preemption requests if
1295 * the interrupt fell in the kernel context
1296 * and preemption isn't disabled
1297 */
1298 movl %gs:CPU_PENDING_AST,%eax
1299 testl $(AST_URGENT),%eax /* any urgent requests? */
1300 je ret_to_kernel /* no, nothing to do */
1301
1302 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1303 jne ret_to_kernel /* yes, skip it */
1304
b0d623f7
A
1305 /*
1306 * Take an AST from kernel space. We don't need (and don't want)
1307 * to do as much as the case where the interrupt came from user
1308 * space.
1309 */
5ba3f43e 1310 CCALL(ast_taken_kernel)
b0d623f7 1311
39236c6e 1312 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1313 jmp ret_to_kernel
1314
1315
1316/*
1317 * nested int - simple path, can't preempt etc on way out
1318 */
1319int_from_intstack:
1320 incl %gs:CPU_PREEMPTION_LEVEL
1321 incl %gs:CPU_INTERRUPT_LEVEL
060df5ea 1322 incl %gs:CPU_NESTED_ISTACK
39236c6e
A
1323
1324 push %gs:CPU_INT_STATE
1325 mov %r15, %gs:CPU_INT_STATE
1326
1327 CCALL1(interrupt, %r15)
1328
1329 pop %gs:CPU_INT_STATE
b0d623f7
A
1330
1331 decl %gs:CPU_INTERRUPT_LEVEL
1332 decl %gs:CPU_PREEMPTION_LEVEL
060df5ea 1333 decl %gs:CPU_NESTED_ISTACK
39236c6e 1334
b0d623f7
A
1335 jmp ret_to_kernel
1336
1337/*
1338 * Take an AST from an interrupted user
1339 */
1340ast_from_interrupt_user:
1341 movl %gs:CPU_PENDING_AST,%eax
1342 testl %eax,%eax /* pending ASTs? */
1343 je EXT(ret_to_user) /* no, nothing to do */
1344
1345 TIME_TRAP_UENTRY
1346
1347 movl $1, %ecx /* check if we're in the PFZ */
1348 jmp L_return_from_trap_with_ast /* return */
1349
1350
1351/* Syscall dispatch routines! */
1352
1353/*
1354 *
1355 * 32bit Tasks
1356 * System call entries via INTR_GATE or sysenter:
1357 *
39236c6e
A
1358 * r15 x86_saved_state32_t
1359 * rsp kernel stack
1360 *
1361 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1362 * interrupts disabled
1363 * direction flag cleared
1364 */
1365
1366Entry(hndl_sysenter)
1367 /*
1368 * We can be here either for a mach syscall or a unix syscall,
1369 * as indicated by the sign of the code:
1370 */
39236c6e 1371 movl R32_EAX(%r15),%eax
b0d623f7
A
1372 testl %eax,%eax
1373 js EXT(hndl_mach_scall) /* < 0 => mach */
1374 /* > 0 => unix */
1375
1376Entry(hndl_unix_scall)
b0d623f7
A
1377
1378 TIME_TRAP_UENTRY
1379
b0d623f7 1380 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1381 movq TH_TASK(%rcx),%rbx /* point to current task */
1382 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1383
1384 /* Check for active vtimers in the current task */
1385 TASK_VTIMER_CHECK(%rbx,%rcx)
1386
1387 sti
1388
39236c6e 1389 CCALL1(unix_syscall, %r15)
b0d623f7
A
1390 /*
1391 * always returns through thread_exception_return
1392 */
1393
1394
1395Entry(hndl_mach_scall)
1396 TIME_TRAP_UENTRY
1397
b0d623f7 1398 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1399 movq TH_TASK(%rcx),%rbx /* point to current task */
1400 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1401
1402 /* Check for active vtimers in the current task */
1403 TASK_VTIMER_CHECK(%rbx,%rcx)
1404
1405 sti
1406
39236c6e 1407 CCALL1(mach_call_munger, %r15)
b0d623f7
A
1408 /*
1409 * always returns through thread_exception_return
1410 */
1411
1412
1413Entry(hndl_mdep_scall)
1414 TIME_TRAP_UENTRY
1415
b0d623f7
A
1416 /* Check for active vtimers in the current task */
1417 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae 1418 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1419 TASK_VTIMER_CHECK(%rbx,%rcx)
1420
1421 sti
1422
39236c6e 1423 CCALL1(machdep_syscall, %r15)
b0d623f7
A
1424 /*
1425 * always returns through thread_exception_return
1426 */
1427
b0d623f7
A
1428/*
1429 * 64bit Tasks
1430 * System call entries via syscall only:
1431 *
39236c6e
A
1432 * r15 x86_saved_state64_t
1433 * rsp kernel stack
1434 *
1435 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1436 * interrupts disabled
1437 * direction flag cleared
1438 */
1439
1440Entry(hndl_syscall)
1441 TIME_TRAP_UENTRY
1442
b0d623f7 1443 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
fe8ab488 1444 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
6d2010ae 1445 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1446
1447 /* Check for active vtimers in the current task */
1448 TASK_VTIMER_CHECK(%rbx,%rcx)
1449
1450 /*
1451 * We can be here either for a mach, unix machdep or diag syscall,
1452 * as indicated by the syscall class:
1453 */
39236c6e 1454 movl R64_RAX(%r15), %eax /* syscall number/class */
b0d623f7
A
1455 movl %eax, %edx
1456 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1457 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1458 je EXT(hndl_mach_scall64)
1459 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1460 je EXT(hndl_unix_scall64)
1461 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1462 je EXT(hndl_mdep_scall64)
1463 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1464 je EXT(hndl_diag_scall64)
1465
1466 /* Syscall class unknown */
316670eb 1467 sti
b0d623f7
A
1468 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1469 /* no return */
1470
1471
1472Entry(hndl_unix_scall64)
6d2010ae 1473 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1474 sti
1475
39236c6e 1476 CCALL1(unix_syscall64, %r15)
b0d623f7
A
1477 /*
1478 * always returns through thread_exception_return
1479 */
1480
1481
1482Entry(hndl_mach_scall64)
6d2010ae 1483 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1484 sti
1485
39236c6e 1486 CCALL1(mach_call_munger64, %r15)
b0d623f7
A
1487 /*
1488 * always returns through thread_exception_return
1489 */
1490
1491
1492
1493Entry(hndl_mdep_scall64)
1494 sti
1495
39236c6e 1496 CCALL1(machdep_syscall64, %r15)
b0d623f7
A
1497 /*
1498 * always returns through thread_exception_return
1499 */
1500
b0d623f7 1501Entry(hndl_diag_scall64)
39236c6e 1502 CCALL1(diagCall64, %r15) // Call diagnostics
316670eb 1503 test %eax, %eax // What kind of return is this?
060df5ea 1504 je 1f // - branch if bad (zero)
060df5ea
A
1505 jmp EXT(return_to_user) // Normal return, do not check asts...
15061:
316670eb 1507 sti
b0d623f7
A
1508 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1509 /* no return */
5c9f4661 1510/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
b0d623f7 1511Entry(hndl_machine_check)
5c9f4661
A
1512 /* Adjust SP and savearea to their canonical, non-aliased addresses */
1513 subq EXT(dblmap_dist)(%rip), %rsp
1514 subq EXT(dblmap_dist)(%rip), %r15
39236c6e 1515 CCALL1(panic_machine_check64, %r15)
b0d623f7
A
1516 hlt
1517
1518Entry(hndl_double_fault)
5c9f4661
A
1519 subq EXT(dblmap_dist)(%rip), %rsp
1520 subq EXT(dblmap_dist)(%rip), %r15
39236c6e 1521 CCALL1(panic_double_fault64, %r15)
b0d623f7 1522 hlt