]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/idt64.s
xnu-4570.51.1.tar.gz
[apple/xnu.git] / osfmk / x86_64 / idt64.s
CommitLineData
b0d623f7 1/*
6d2010ae 2 * Copyright (c) 2010 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <i386/asm.h>
29#include <assym.s>
39236c6e 30#include <debug.h>
b0d623f7 31#include <i386/eflags.h>
6d2010ae 32#include <i386/rtclock_asm.h>
b0d623f7
A
33#include <i386/trap.h>
34#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
35#include <mach/i386/syscall_sw.h>
36#include <i386/postcode.h>
37#include <i386/proc_reg.h>
38#include <mach/exception_types.h>
39
40#if DEBUG
41#define DEBUG_IDT64 1
42#endif
43
44/*
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
48 *
49 * Code here is structured as follows:
50 *
51 * stubs Code called directly from an IDT vector.
52 * All entry points have the "idt64_" prefix and they are built
53 * using macros expanded by the inclusion of idt_table.h.
54 * This code performs vector-dependent identification and jumps
55 * into the dispatch code.
56 *
57 * dispatch The dispatch code is responsible for saving the thread state
58 * (which is either 64-bit or 32-bit) and then jumping to the
59 * class handler identified by the stub.
60 *
61 * returns Code to restore state and return to the previous context.
62 *
63 * handlers There are several classes of handlers:
64 * interrupt - asynchronous events typically from external devices
65 * trap - synchronous events due to thread execution
66 * syscall - synchronous system call request
67 * fatal - fatal traps
68 */
b0d623f7 69/*
5c9f4661 70 * Indices of handlers for each exception type.
b0d623f7 71 */
5c9f4661
A
72#define HNDL_ALLINTRS 0
73#define HNDL_ALLTRAPS 1
74#define HNDL_SYSENTER 2
75#define HNDL_SYSCALL 3
76#define HNDL_UNIX_SCALL 4
77#define HNDL_MACH_SCALL 5
78#define HNDL_MDEP_SCALL 6
79#define HNDL_DOUBLE_FAULT 7
80#define HNDL_MACHINE_CHECK 8
81
82/* Begin double-mapped descriptor section */
83
84.section __HIB, __desc
85.globl EXT(idt64_hndl_table0)
86EXT(idt64_hndl_table0):
87 .quad EXT(ks_dispatch)
88 .quad EXT(ks_64bit_return)
89 .quad 0 /* Populated with CPU shadow displacement*/
90 .quad EXT(ks_return)
91
92EXT(idt64_hndl_table1):
93 .quad EXT(hndl_allintrs)
94 .quad EXT(hndl_alltraps)
95 .quad EXT(hndl_sysenter)
96 .quad EXT(hndl_syscall)
97 .quad EXT(hndl_unix_scall)
98 .quad EXT(hndl_mach_scall)
99 .quad EXT(hndl_mdep_scall)
100 .quad EXT(hndl_double_fault)
101 .quad EXT(hndl_machine_check)
102.text
103
b0d623f7
A
104
105/* The wrapper for all non-special traps/interrupts */
106/* Everything up to PUSH_FUNCTION is just to output
107 * the interrupt number out to the postcode display
108 */
109#if DEBUG_IDT64
110#define IDT_ENTRY_WRAPPER(n, f) \
111 push %rax ;\
112 POSTCODE2(0x6400+n) ;\
113 pop %rax ;\
5c9f4661 114 pushq $(f) ;\
b0d623f7
A
115 pushq $(n) ;\
116 jmp L_dispatch
117#else
118#define IDT_ENTRY_WRAPPER(n, f) \
5c9f4661 119 pushq $(f) ;\
b0d623f7
A
120 pushq $(n) ;\
121 jmp L_dispatch
122#endif
123
124/* A trap that comes with an error code already on the stack */
125#define TRAP_ERR(n, f) \
126 Entry(f) ;\
127 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
128
129/* A normal trap */
130#define TRAP(n, f) \
131 Entry(f) ;\
132 pushq $0 ;\
133 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
134
135#define USER_TRAP TRAP
136
137/* An interrupt */
138#define INTERRUPT(n) \
139 Entry(_intr_ ## n) ;\
140 pushq $0 ;\
141 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
142
143/* A trap with a special-case handler, hence we don't need to define anything */
144#define TRAP_SPC(n, f)
39236c6e
A
145#define TRAP_IST1(n, f)
146#define TRAP_IST2(n, f)
b0d623f7
A
147#define USER_TRAP_SPC(n, f)
148
5c9f4661
A
149/* Begin double-mapped text section */
150.section __HIB, __text
b0d623f7
A
151/* Generate all the stubs */
152#include "idt_table.h"
153
5c9f4661
A
154Entry(idt64_page_fault)
155 pushq $(HNDL_ALLTRAPS)
156 push $(T_PAGE_FAULT)
157 jmp L_dispatch
158
159Entry(idt64_debug)
160 push $0 /* error code */
161 pushq $(HNDL_ALLTRAPS)
162 pushq $(T_DEBUG)
163 jmp L_dispatch
164/*
165 * Legacy interrupt gate System call handlers.
166 * These are entered via a syscall interrupt. The system call number in %rax
167 * is saved to the error code slot in the stack frame. We then branch to the
168 * common state saving code.
169 */
170
171#ifndef UNIX_INT
172#error NO UNIX INT!!!
173#endif
174Entry(idt64_unix_scall)
175 pushq %rax /* save system call number */
176 pushq $(HNDL_UNIX_SCALL)
177 pushq $(UNIX_INT)
178 jmp L_dispatch
179
180Entry(idt64_mach_scall)
181 pushq %rax /* save system call number */
182 pushq $(HNDL_MACH_SCALL)
183 pushq $(MACH_INT)
184 jmp L_dispatch
185
186Entry(idt64_mdep_scall)
187 pushq %rax /* save system call number */
188 pushq $(HNDL_MDEP_SCALL)
189 pushq $(MACHDEP_INT)
190 jmp L_dispatch
191
192/*
193 * For GP/NP/SS faults, we use the IST1 stack.
194 * For faults from user-space, we have to copy the machine state to the
195 * PCB stack and then dispatch as normal.
196 * For faults in kernel-space, we need to scrub for kernel exit faults and
197 * treat these as user-space faults. But for all other kernel-space faults
198 * we continue to run on the IST1 stack and we dispatch to handle the fault
199 * as fatal.
200 */
201Entry(idt64_gen_prot)
202 pushq $(HNDL_ALLTRAPS)
203 pushq $(T_GENERAL_PROTECTION)
204 jmp L_dispatch
205
206Entry(idt64_stack_fault)
207 pushq $(HNDL_ALLTRAPS)
208 pushq $(T_STACK_FAULT)
209 jmp L_dispatch
210
211Entry(idt64_segnp)
212 pushq $(HNDL_ALLTRAPS)
213 pushq $(T_SEGMENT_NOT_PRESENT)
214 jmp L_dispatch
215
216/*
217 * Fatal exception handlers:
218 */
219Entry(idt64_db_task_dbl_fault)
220 pushq $(HNDL_DOUBLE_FAULT)
221 pushq $(T_DOUBLE_FAULT)
222 jmp L_dispatch
223
224Entry(idt64_db_task_stk_fault)
225 pushq $(HNDL_DOUBLE_FAULT)
226 pushq $(T_STACK_FAULT)
227 jmp L_dispatch
228
229Entry(idt64_mc)
230 push $(0) /* Error */
231 pushq $(HNDL_MACHINE_CHECK)
232 pushq $(T_MACHINE_CHECK)
233 jmp L_dispatch
234
235/*
236 * NMI
237 * This may or may not be fatal but extreme care is required
238 * because it may fall when control was already in another trampoline.
239 *
a39ff7e2
A
240 * We get here on IST2 stack which is used exclusively for NMIs.
241 * Machine checks, doublefaults and similar use IST1
5c9f4661
A
242 */
243Entry(idt64_nmi)
a39ff7e2
A
244 /* Synthesize common interrupt stack frame */
245 pushq $0
246 pushq $(HNDL_ALLINTRS)
247 pushq $(T_NMI)
248 /* Spill prior to RDMSR */
249 push %rax
250 push %rcx
251 push %rdx
252 mov $(MSR_IA32_GS_BASE), %ecx
253 rdmsr /* Check contents of GSBASE MSR */
254 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */
255 jnz 44f
256 swapgs /* Either direct from user or within trampolines */
25744:
258 pop %rdx
259 pop %rcx
260
261 leaq EXT(idt64_hndl_table0)(%rip), %rax
262 mov 16(%rax), %rax /* Offset of per-CPU shadow */
263 mov %gs:CPU_KERNEL_CR3(%rax), %rax
264 mov %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */
265 leaq EXT(idt64_hndl_table0)(%rip), %rax
266 jmp *(%rax)
5c9f4661
A
267
268Entry(idt64_double_fault)
269 pushq $(HNDL_DOUBLE_FAULT)
270 pushq $(T_DOUBLE_FAULT)
271 jmp L_dispatch
272
273Entry(hi64_syscall)
274Entry(idt64_syscall)
275 swapgs
276 /* Use RAX as a temporary by shifting its contents into R11[32:63]
277 * The systemcall number is defined to be a 32-bit quantity, as is
278 * RFLAGS.
279 */
280 shlq $32, %rax
281 or %rax, %r11
282.globl EXT(dblsyscall_patch_point)
283EXT(dblsyscall_patch_point):
284// movabsq $0x12345678ABCDEFFFULL, %rax
285 /* Generate offset to the double-mapped per-CPU data shadow
286 * into RAX
287 */
288 leaq EXT(idt64_hndl_table0)(%rip), %rax
289 mov 16(%rax), %rax
290 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */
291 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */
292 sub $(ISF64_SIZE), %rsp
293
294 /*
295 * Synthesize an ISF frame on the exception stack
296 */
297 movl $(USER_DS), ISF64_SS(%rsp)
298 mov %rcx, ISF64_RIP(%rsp) /* rip */
299
300 mov %gs:CPU_UBER_TMP(%rax), %rcx
301 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */
302
303 mov %r11, %rax
304 shrq $32, %rax /* Restore RAX */
305 mov %r11d, %r11d /* Clear r11[32:63] */
306
307 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */
308 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */
309 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */
310 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
311 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */
312 swapgs
313 jmp L_dispatch /* this can only be 64-bit */
314
315Entry(hi64_sysenter)
316Entry(idt64_sysenter)
317 /* Synthesize an interrupt stack frame onto the
318 * exception stack.
319 */
320 push $(USER_DS) /* ss */
321 push %rcx /* uesp */
322 pushf /* flags */
323 /*
324 * Clear, among others, the Nested Task (NT) flags bit;
325 * this is zeroed by INT, but not by SYSENTER.
326 */
327 push $0
328 popf
329 push $(SYSENTER_CS) /* cs */
330L_sysenter_continue:
331 push %rdx /* eip */
332 push %rax /* err/eax - syscall code */
333 pushq $(HNDL_SYSENTER)
334 pushq $(T_SYSENTER)
335 orl $(EFL_IF), ISF64_RFLAGS(%rsp)
336 jmp L_dispatch
337
b0d623f7
A
338/*
339 * Common dispatch point.
340 * Determine what mode has been interrupted and save state accordingly.
39236c6e
A
341 * Here with:
342 * rsp from user-space: interrupt state in PCB, or
343 * from kernel-space: interrupt state in kernel or interrupt stack
344 * GSBASE from user-space: pthread area, or
345 * from kernel-space: cpu_data
b0d623f7 346 */
5c9f4661 347
b0d623f7 348L_dispatch:
5c9f4661
A
349 pushq %rax
350 testb $3, 8+ISF64_CS(%rsp)
351 jz 1f
352 swapgs
353 leaq EXT(idt64_hndl_table0)(%rip), %rax
354 mov 16(%rax), %rax
355
356 mov %gs:CPU_TASK_CR3(%rax), %rax
357 mov %rax, %cr3
358#if DEBUG
359 mov %rax, %gs:CPU_ENTRY_CR3
360#endif
3611:
362 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
363 leaq EXT(idt64_hndl_table0)(%rip), %rax
364 /* Indirect branch to non-doublemapped trampolines */
365 jmp *(%rax)
366/* User return: register restoration and address space switch sequence */
367Entry(ks_64bit_return)
368 mov R64_R14(%r15), %r14
369 mov R64_R13(%r15), %r13
370 mov R64_R12(%r15), %r12
371 mov R64_R11(%r15), %r11
372 mov R64_R10(%r15), %r10
373 mov R64_R9(%r15), %r9
374 mov R64_R8(%r15), %r8
375 mov R64_RSI(%r15), %rsi
376 mov R64_RDI(%r15), %rdi
377 mov R64_RBP(%r15), %rbp
378 mov R64_RDX(%r15), %rdx
379 mov R64_RCX(%r15), %rcx
380 mov R64_RBX(%r15), %rbx
381 mov R64_RAX(%r15), %rax
382 /* Switch to per-CPU exception stack */
383 mov %gs:CPU_ESTACK, %rsp
384
385 /* Synthesize interrupt stack frame from PCB savearea to exception stack */
386 push R64_SS(%r15)
387 push R64_RSP(%r15)
388 push R64_RFLAGS(%r15)
389 push R64_CS(%r15)
390 push R64_RIP(%r15)
391
392 mov R64_R15(%r15), %r15
393 cmpq $(KERNEL64_CS), 8(%rsp)
394 jz 1f
395 /* Discover user cr3/ASID */
396 push %rax
397 mov %gs:CPU_UCR3, %rax
398#if DEBUG
399 mov %rax, %gs:CPU_EXIT_CR3
400#endif
401 mov %rax, %cr3
402 /* Continue execution on the shared/doublemapped trampoline */
403 pop %rax
404 swapgs
4051:
406 cmpl $(SYSCALL_CS), 8(%rsp) /* test for exit via SYSRET */
407 je L_sysret
408EXT(ret64_iret):
409 iretq /* return from interrupt */
410L_sysret:
411 /*
412 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
413 * rcx user rip
414 * r11 user rflags
415 * rsp user stack pointer
416 */
417 pop %rcx
418 add $8, %rsp
419 pop %r11
420 pop %rsp
421 sysretq /* return from system call */
422/* End of double-mapped TEXT */
423.text
424
425Entry(ks_dispatch)
426 popq %rax
6d2010ae 427 cmpl $(KERNEL64_CS), ISF64_CS(%rsp)
5c9f4661 428 je EXT(ks_dispatch_kernel)
b0d623f7 429
5c9f4661
A
430 mov %rax, %gs:CPU_UBER_TMP
431 mov %gs:CPU_UBER_ISF, %rax
432 add $(ISF64_SIZE), %rax
433
434 xchg %rsp, %rax
435/* Memory to memory moves (aint x86 wonderful):
436 * Transfer the exception frame from the per-CPU exception stack to the
437 * 'PCB' stack programmed at cswitch.
438 */
439 push ISF64_SS(%rax)
440 push ISF64_RSP(%rax)
441 push ISF64_RFLAGS(%rax)
442 push ISF64_CS(%rax)
443 push ISF64_RIP(%rax)
444 push ISF64_ERR(%rax)
445 push ISF64_TRAPFN(%rax)
446 push ISF64_TRAPNO(%rax)
447 mov %gs:CPU_UBER_TMP, %rax
448 jmp EXT(ks_dispatch_user)
b0d623f7 449
5c9f4661
A
450Entry (ks_return)
451 jmp .
452
453Entry(ks_dispatch_user)
060df5ea 454 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
39236c6e
A
455 je L_dispatch_U32 /* 32-bit user task */
456
457L_dispatch_U64:
458 subq $(ISS64_OFFSET), %rsp
459 mov %r15, R64_R15(%rsp)
460 mov %rsp, %r15
461 mov %gs:CPU_KERNEL_STACK, %rsp
462 jmp L_dispatch_64bit
463
5c9f4661 464Entry(ks_dispatch_kernel)
39236c6e
A
465 subq $(ISS64_OFFSET), %rsp
466 mov %r15, R64_R15(%rsp)
467 mov %rsp, %r15
b0d623f7
A
468
469/*
470 * Here for 64-bit user task or kernel
471 */
39236c6e
A
472L_dispatch_64bit:
473 movl $(SS_64), SS_FLAVOR(%r15)
b0d623f7
A
474
475 /*
476 * Save segment regs - for completeness since theyre not used.
477 */
39236c6e
A
478 movl %fs, R64_FS(%r15)
479 movl %gs, R64_GS(%r15)
b0d623f7
A
480
481 /* Save general-purpose registers */
39236c6e
A
482 mov %rax, R64_RAX(%r15)
483 mov %rbx, R64_RBX(%r15)
484 mov %rcx, R64_RCX(%r15)
485 mov %rdx, R64_RDX(%r15)
486 mov %rbp, R64_RBP(%r15)
487 mov %rdi, R64_RDI(%r15)
488 mov %rsi, R64_RSI(%r15)
489 mov %r8, R64_R8(%r15)
490 mov %r9, R64_R9(%r15)
491 mov %r10, R64_R10(%r15)
492 mov %r11, R64_R11(%r15)
493 mov %r12, R64_R12(%r15)
494 mov %r13, R64_R13(%r15)
495 mov %r14, R64_R14(%r15)
b0d623f7 496
a39ff7e2
A
497 /* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
498 xor %ecx, %ecx
499 xor %edi, %edi
500 xor %r8, %r8
501 xor %r9, %r9
502 xor %r10, %r10
503 xor %r11, %r11
504 xor %r12, %r12
505 xor %r13, %r13
506 xor %r14, %r14
507
b0d623f7
A
508 /* cr2 is significant only for page-faults */
509 mov %cr2, %rax
39236c6e 510 mov %rax, R64_CR2(%r15)
b0d623f7 511
39236c6e
A
512 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
513 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
514 mov R64_CS(%r15), %esi /* %esi := cs for later */
b0d623f7 515
39236c6e 516 jmp L_common_dispatch
b0d623f7
A
517
518L_64bit_entry_reject:
519 /*
520 * Here for a 64-bit user attempting an invalid kernel entry.
521 */
5c9f4661 522 movq $(HNDL_ALLTRAPS), ISF64_TRAPFN(%rsp)
b0d623f7 523 movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
39236c6e 524 jmp L_dispatch_U64
b0d623f7 525
5c9f4661 526Entry(ks_32bit_entry_check)
b0d623f7
A
527 /*
528 * Check we're not a confused 64-bit user.
529 */
530 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
531 jne L_64bit_entry_reject
532 /* fall through to 32-bit handler: */
533
39236c6e
A
534L_dispatch_U32: /* 32-bit user task */
535 subq $(ISS64_OFFSET), %rsp
536 mov %rsp, %r15
537 mov %gs:CPU_KERNEL_STACK, %rsp
538 movl $(SS_32), SS_FLAVOR(%r15)
b0d623f7
A
539
540 /*
541 * Save segment regs
542 */
39236c6e
A
543 movl %ds, R32_DS(%r15)
544 movl %es, R32_ES(%r15)
545 movl %fs, R32_FS(%r15)
546 movl %gs, R32_GS(%r15)
b0d623f7
A
547
548 /*
549 * Save general 32-bit registers
550 */
39236c6e
A
551 mov %eax, R32_EAX(%r15)
552 mov %ebx, R32_EBX(%r15)
553 mov %ecx, R32_ECX(%r15)
554 mov %edx, R32_EDX(%r15)
555 mov %ebp, R32_EBP(%r15)
556 mov %esi, R32_ESI(%r15)
557 mov %edi, R32_EDI(%r15)
b0d623f7
A
558
559 /* Unconditionally save cr2; only meaningful on page faults */
560 mov %cr2, %rax
39236c6e 561 mov %eax, R32_CR2(%r15)
a39ff7e2
A
562 /* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
563 xor %ecx, %ecx
564 xor %edi, %edi
565 xor %r8, %r8
566 xor %r9, %r9
567 xor %r10, %r10
568 xor %r11, %r11
569 xor %r12, %r12
570 xor %r13, %r13
571 xor %r14, %r14
b0d623f7
A
572
573 /*
574 * Copy registers already saved in the machine state
575 * (in the interrupt stack frame) into the compat save area.
576 */
39236c6e
A
577 mov R64_RIP(%r15), %eax
578 mov %eax, R32_EIP(%r15)
579 mov R64_RFLAGS(%r15), %eax
580 mov %eax, R32_EFLAGS(%r15)
581 mov R64_RSP(%r15), %eax
582 mov %eax, R32_UESP(%r15)
583 mov R64_SS(%r15), %eax
584 mov %eax, R32_SS(%r15)
585L_dispatch_U32_after_fault:
586 mov R64_CS(%r15), %esi /* %esi := %cs for later */
587 mov %esi, R32_CS(%r15)
588 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */
589 mov %ebx, R32_TRAPNO(%r15)
590 mov R64_ERR(%r15), %eax
591 mov %eax, R32_ERR(%r15)
592 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */
b0d623f7
A
593
594L_common_dispatch:
fe8ab488
A
595 cld /* Ensure the direction flag is clear in the kernel */
596 cmpl $0, EXT(pmap_smap_enabled)(%rip)
597 je 1f
598 clac /* Clear EFLAGS.AC if SMAP is present/enabled */
5991:
b0d623f7 600 /*
39037602 601 * On entering the kernel, we typically don't switch CR3
b0d623f7 602 * because the kernel shares the user's address space.
39037602
A
603 * But we mark the kernel's cr3 as "active" for TLB coherency evaluation
604 * If, however, the CPU's invalid TLB flag is set, we have to invalidate the TLB
605 * since the kernel pagetables were changed while we were in userspace.
b0d623f7 606 *
39037602
A
607 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
608 * we switch to the kernel's address space on entry. Also,
609 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
b0d623f7
A
610 * so that illicit accesses to userspace can be trapped.
611 */
612 mov %gs:CPU_KERNEL_CR3, %rcx
613 mov %rcx, %gs:CPU_ACTIVE_CR3
614 test $3, %esi /* user/kernel? */
fe8ab488 615 jz 2f /* skip cr3 reload from kernel */
b0d623f7 616 xor %rbp, %rbp
39037602
A
617 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
618 jnz 11f
b0d623f7 619 cmpl $0, EXT(no_shared_cr3)(%rip)
fe8ab488 620 je 2f
39037602
A
62111:
622 xor %eax, %eax
623 movw %gs:CPU_KERNEL_PCID, %ax
624 or %rax, %rcx
b0d623f7 625 mov %rcx, %cr3 /* load kernel cr3 */
fe8ab488
A
626 jmp 4f /* and skip tlb flush test */
6272:
6d2010ae
A
628 mov %gs:CPU_ACTIVE_CR3+4, %rcx
629 shr $32, %rcx
630 testl %ecx, %ecx
fe8ab488 631 jz 4f
4bd07ac2 632 movl $0, %gs:CPU_TLB_INVALID
6d2010ae
A
633 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
634 and $(~CR4_PGE), %rcx
635 mov %rcx, %cr4
636 or $(CR4_PGE), %rcx
637 mov %rcx, %cr4
fe8ab488 6384:
b0d623f7 639 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */
5ba3f43e
A
640 testq %rcx, %rcx
641 je 5f
fe8ab488 642 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */
6d2010ae 643 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */
fe8ab488 644 je 5f
316670eb 645 xor %ecx, %ecx /* If so, reset DR7 (the control) */
b0d623f7 646 mov %rcx, %dr7
fe8ab488 6475:
6d2010ae 648 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count
b0d623f7 649 /* Dispatch the designated handler */
a39ff7e2
A
650 cmp EXT(dblmap_base)(%rip), %rsp
651 jb 66f
652 cmp EXT(dblmap_max)(%rip), %rsp
653 jge 66f
654 subq EXT(dblmap_dist)(%rip), %rsp
655 subq EXT(dblmap_dist)(%rip), %r15
65666:
5c9f4661
A
657 leaq EXT(idt64_hndl_table1)(%rip), %rax
658 jmp *(%rax, %rdx, 8)
b0d623f7
A
659
660/*
661 * Control is passed here to return to user.
662 */
663Entry(return_to_user)
664 TIME_TRAP_UEXIT
665
666Entry(ret_to_user)
667// XXX 'Be nice to tidy up this debug register restore sequence...
668 mov %gs:CPU_ACTIVE_THREAD, %rdx
6d2010ae 669 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */
b0d623f7 670
316670eb 671 test %rax, %rax /* Is there a debug register context? */
b0d623f7
A
672 je 2f /* branch if not */
673 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
674 jne 1f
675 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */
676 movq %rcx, %dr0
677 movl DS_DR1(%rax), %ecx
678 movq %rcx, %dr1
679 movl DS_DR2(%rax), %ecx
680 movq %rcx, %dr2
681 movl DS_DR3(%rax), %ecx
682 movq %rcx, %dr3
683 movl DS_DR7(%rax), %ecx
684 movq %rcx, %gs:CPU_DR7
685 jmp 2f
6861:
687 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/
688 mov %rcx, %dr0
689 mov DS64_DR1(%rax), %rcx
690 mov %rcx, %dr1
691 mov DS64_DR2(%rax), %rcx
692 mov %rcx, %dr2
693 mov DS64_DR3(%rax), %rcx
694 mov %rcx, %dr3
695 mov DS64_DR7(%rax), %rcx
696 mov %rcx, %gs:CPU_DR7
6972:
698 /*
39037602 699 * On exiting the kernel there's typically no need to switch cr3 since we're
b0d623f7 700 * already running in the user's address space which includes the
39037602
A
701 * kernel. We now mark the task's cr3 as active, for TLB coherency.
702 * If the target address space has a pagezero mapping present, or
703 * if no_shared_cr3 is set, we do need to switch cr3 at this point.
b0d623f7
A
704 */
705 mov %gs:CPU_TASK_CR3, %rcx
706 mov %rcx, %gs:CPU_ACTIVE_CR3
39037602
A
707 cmpl $0, %gs:CPU_PAGEZERO_MAPPED
708 jnz L_cr3_switch_island
6d2010ae
A
709 movl EXT(no_shared_cr3)(%rip), %eax
710 test %eax, %eax /* -no_shared_cr3 */
39037602
A
711 jnz L_cr3_switch_island
712
713L_cr3_switch_return:
b0d623f7
A
714 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/
715 cmp $0, %rax
716 je 4f
717 mov %rax, %dr7 /* Set DR7 */
718 movq $0, %gs:CPU_DR7
7194:
39236c6e 720 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
b0d623f7
A
721 je L_64bit_return
722
723L_32bit_return:
724#if DEBUG_IDT64
39236c6e 725 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */
b0d623f7
A
726 je 1f
727 cli
728 POSTCODE2(0x6432)
fe8ab488 729 CCALL1(panic_idt64, %r15)
b0d623f7
A
7301:
731#endif /* DEBUG_IDT64 */
732
733 /*
734 * Restore registers into the machine state for iret.
39236c6e 735 * Here on fault stack and PCB address in R11.
b0d623f7 736 */
39236c6e
A
737 movl R32_EIP(%r15), %eax
738 movl %eax, R64_RIP(%r15)
739 movl R32_EFLAGS(%r15), %eax
740 movl %eax, R64_RFLAGS(%r15)
741 movl R32_CS(%r15), %eax
742 movl %eax, R64_CS(%r15)
743 movl R32_UESP(%r15), %eax
744 movl %eax, R64_RSP(%r15)
745 movl R32_SS(%r15), %eax
746 movl %eax, R64_SS(%r15)
b0d623f7 747
5c9f4661
A
748 /* Validate DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
749 /* Exempt "known good" statically configured selectors, e.g. USER_DS and 0 */
750 cmpl $(USER_DS), R32_DS(%r15)
751 jz 22f
752 cmpl $0, R32_DS(%r15)
753 jz 22f
754 larw R32_DS(%r15), %ax
755 jz 22f
756 movl $(USER_DS), R32_DS(%r15)
75722:
758 cmpl $(USER_DS), R32_ES(%r15)
759 jz 33f
760 cmpl $0, R32_ES(%r15)
761 jz 33f
762 larw R32_ES(%r15), %ax
763 jz 33f
764 movl $(USER_DS), R32_ES(%r15)
76533:
766 cmpl $(USER_DS), R32_FS(%r15)
767 jz 44f
768 cmpl $0, R32_FS(%r15)
769 jz 44f
770 larw R32_FS(%r15), %ax
771 jz 44f
772 movl $(USER_DS), R32_FS(%r15)
77344:
774 cmpl $(USER_CTHREAD), R32_GS(%r15)
775 jz 55f
776 cmpl $0, R32_GS(%r15)
777 jz 55f
778 larw R32_GS(%r15), %ax
779 jz 55f
780 movl $(USER_CTHREAD), R32_GS(%r15)
78155:
b0d623f7
A
782 /*
783 * Restore general 32-bit registers
784 */
39236c6e
A
785 movl R32_EAX(%r15), %eax
786 movl R32_EBX(%r15), %ebx
787 movl R32_ECX(%r15), %ecx
788 movl R32_EDX(%r15), %edx
789 movl R32_EBP(%r15), %ebp
790 movl R32_ESI(%r15), %esi
791 movl R32_EDI(%r15), %edi
b0d623f7
A
792
793 /*
39236c6e
A
794 * Restore segment registers. A segment exception taken here will
795 * push state on the IST1 stack and will not affect the "PCB stack".
b0d623f7 796 */
39236c6e 797 mov %r15, %rsp /* Set the PCB as the stack */
b0d623f7 798 swapgs
00867663 799
a39ff7e2 800 /* Zero 64-bit-exclusive GPRs to prevent data leaks */
00867663
A
801 xor %r8, %r8
802 xor %r9, %r9
803 xor %r10, %r10
804 xor %r11, %r11
805 xor %r12, %r12
806 xor %r13, %r13
807 xor %r14, %r14
808 xor %r15, %r15
809
b0d623f7 810EXT(ret32_set_ds):
5c9f4661 811 movw R32_DS(%rsp), %ds
b0d623f7 812EXT(ret32_set_es):
5c9f4661 813 movw R32_ES(%rsp), %es
b0d623f7 814EXT(ret32_set_fs):
5c9f4661 815 movw R32_FS(%rsp), %fs
b0d623f7 816EXT(ret32_set_gs):
5c9f4661 817 movw R32_GS(%rsp), %gs
b0d623f7
A
818
819 /* pop compat frame + trapno, trapfn and error */
39236c6e 820 add $(ISS64_OFFSET)+8+8+8, %rsp
316670eb 821 cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
b0d623f7 822 /* test for fast entry/exit */
316670eb 823 je L_fast_exit
b0d623f7 824EXT(ret32_iret):
316670eb 825 iretq /* return from interrupt */
b0d623f7
A
826
827L_fast_exit:
828 pop %rdx /* user return eip */
829 pop %rcx /* pop and toss cs */
830 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */
831 popf /* flags - carry denotes failure */
832 pop %rcx /* user return esp */
833 sti /* interrupts enabled after sysexit */
39236c6e 834 sysexitl /* 32-bit sysexit */
b0d623f7 835
39037602
A
836L_cr3_switch_island:
837 xor %eax, %eax
838 movw %gs:CPU_ACTIVE_PCID, %ax
839 or %rax, %rcx
840 mov %rcx, %cr3
841 jmp L_cr3_switch_return
842
b0d623f7
A
843ret_to_kernel:
844#if DEBUG_IDT64
39236c6e 845 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */
b0d623f7
A
846 je 1f
847 cli
848 POSTCODE2(0x6464)
39236c6e 849 CCALL1(panic_idt64, %r15)
b0d623f7
A
850 hlt
8511:
39236c6e 852 cmpl $(KERNEL64_CS), R64_CS(%r15)
b0d623f7 853 je 2f
39236c6e 854 CCALL1(panic_idt64, %r15)
b0d623f7
A
855 hlt
8562:
857#endif
858
859L_64bit_return:
39236c6e
A
860 /*
861 * Restore general 64-bit registers.
862 * Here on fault stack and PCB address in R15.
863 */
5c9f4661
A
864 leaq EXT(idt64_hndl_table0)(%rip), %rax
865 jmp *8(%rax)
b0d623f7 866
5c9f4661 867Entry(ks_idt64_debug_kernel)
b0d623f7
A
868 /*
869 * trap came from kernel mode
870 */
871
872 push %rax /* save %rax temporarily */
b0d623f7 873 lea EXT(idt64_sysenter)(%rip), %rax
6d2010ae 874 cmp %rax, ISF64_RIP+8(%rsp)
b0d623f7 875 pop %rax
5c9f4661 876 jne EXT(ks_dispatch_kernel)
b0d623f7
A
877 /*
878 * Interrupt stack frame has been pushed on the temporary stack.
6d2010ae 879 * We have to switch to pcb stack and patch up the saved state.
b0d623f7 880 */
6d2010ae
A
881 mov %rcx, ISF64_ERR(%rsp) /* save %rcx in error slot */
882 mov ISF64_SS+8(%rsp), %rcx /* top of temp stack -> pcb stack */
b0d623f7
A
883 xchg %rcx,%rsp /* switch to pcb stack */
884 push $(USER_DS) /* ss */
6d2010ae
A
885 push ISF64_ERR(%rcx) /* saved %rcx into rsp slot */
886 push ISF64_RFLAGS(%rcx) /* rflags */
b0d623f7 887 push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */
6d2010ae 888 mov ISF64_ERR(%rcx),%rcx /* restore %rcx */
b0d623f7 889 jmp L_sysenter_continue /* continue sysenter entry */
b0d623f7 890
5c9f4661 891Entry(ks_trap_check_kernel_exit)
39236c6e
A
892 testb $3,ISF64_CS(%rsp)
893 jz L_kernel_gpf
894
895 /* Here for fault from user-space. Copy interrupt state to PCB. */
896 swapgs
b0d623f7 897 push %rax
39236c6e
A
898 mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */
899 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
900 mov ISF64_SS+8(%rsp), %rax
901 mov %rax, ISF64_SS(%rcx)
902 mov ISF64_RSP+8(%rsp), %rax
903 mov %rax, ISF64_RSP(%rcx)
904 mov ISF64_RFLAGS+8(%rsp), %rax
905 mov %rax, ISF64_RFLAGS(%rcx)
906 mov ISF64_CS+8(%rsp), %rax
907 mov %rax, ISF64_CS(%rcx)
908 mov ISF64_RIP+8(%rsp), %rax
909 mov %rax, ISF64_RIP(%rcx)
910 mov ISF64_ERR+8(%rsp), %rax
911 mov %rax, ISF64_ERR(%rcx)
912 mov ISF64_TRAPFN+8(%rsp), %rax
913 mov %rax, ISF64_TRAPFN(%rcx)
914 mov ISF64_TRAPNO+8(%rsp), %rax
915 mov %rax, ISF64_TRAPNO(%rcx)
916 pop %rax
917 mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */
918 xchg %rcx, %rsp /* to PCB stack with user RCX */
5c9f4661 919 jmp EXT(ks_dispatch_user)
b0d623f7 920
39236c6e
A
921L_kernel_gpf:
922 /* Here for GPF from kernel_space. Check for recoverable cases. */
923 push %rax
b0d623f7 924 leaq EXT(ret32_iret)(%rip), %rax
6d2010ae 925 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
926 je L_fault_iret
927 leaq EXT(ret64_iret)(%rip), %rax
6d2010ae 928 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
929 je L_fault_iret
930 leaq EXT(ret32_set_ds)(%rip), %rax
6d2010ae 931 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
932 je L_32bit_fault_set_seg
933 leaq EXT(ret32_set_es)(%rip), %rax
6d2010ae 934 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
935 je L_32bit_fault_set_seg
936 leaq EXT(ret32_set_fs)(%rip), %rax
6d2010ae 937 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7
A
938 je L_32bit_fault_set_seg
939 leaq EXT(ret32_set_gs)(%rip), %rax
6d2010ae 940 cmp %rax, 8+ISF64_RIP(%rsp)
b0d623f7 941 je L_32bit_fault_set_seg
5c9f4661 942 jmp EXT(ks_kernel_trap)
39236c6e 943 /* Fall through */
6d2010ae 944
5c9f4661 945Entry(ks_kernel_trap)
6d2010ae
A
946 /*
947 * Here after taking an unexpected trap from kernel mode - perhaps
948 * while running in the trampolines hereabouts.
949 * Note: %rax has been pushed on stack.
950 * Make sure we're not on the PCB stack, if so move to the kernel stack.
951 * This is likely a fatal condition.
39236c6e 952 * But first, ensure we have the kernel gs base active...
6d2010ae 953 */
39236c6e
A
954 push %rcx
955 push %rdx
956 mov $(MSR_IA32_GS_BASE), %ecx
957 rdmsr /* read kernel gsbase */
958 test $0x80000000, %edx /* test MSB of address */
959 jne 1f
960 swapgs /* so swap */
6d2010ae 9611:
39236c6e
A
962 pop %rdx
963 pop %rcx
964
6d2010ae
A
965 movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */
966 subq %rsp, %rax
967 cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */
968 jb 2f /* - yes, deal with it */
969 pop %rax /* - no, restore %rax */
5c9f4661 970 jmp EXT(ks_dispatch_kernel)
6d2010ae
A
9712:
972 /*
973 * Here if %rsp is in the PCB
974 * Copy the interrupt stack frame from PCB stack to kernel stack
975 */
976 movq %gs:CPU_KERNEL_STACK, %rax
977 xchgq %rax, %rsp
978 pushq 8+ISF64_SS(%rax)
979 pushq 8+ISF64_RSP(%rax)
980 pushq 8+ISF64_RFLAGS(%rax)
981 pushq 8+ISF64_CS(%rax)
982 pushq 8+ISF64_RIP(%rax)
983 pushq 8+ISF64_ERR(%rax)
984 pushq 8+ISF64_TRAPFN(%rax)
985 pushq 8+ISF64_TRAPNO(%rax)
986 movq (%rax), %rax
5c9f4661 987 jmp EXT(ks_dispatch_kernel)
39236c6e 988
b0d623f7 989
b0d623f7
A
990/*
991 * GP/NP fault on IRET: CS or SS is in error.
39236c6e
A
992 * User GSBASE is active.
993 * On IST1 stack containing:
994 * (rax saved above, which is immediately popped)
995 * 0 ISF64_TRAPNO: trap code (NP or GP)
996 * 8 ISF64_TRAPFN: trap function
997 * 16 ISF64_ERR: segment number in error (error code)
998 * 24 ISF64_RIP: kernel RIP
999 * 32 ISF64_CS: kernel CS
1000 * 40 ISF64_RFLAGS: kernel RFLAGS
1001 * 48 ISF64_RSP: kernel RSP
1002 * 56 ISF64_SS: kernel SS
1003 * On the PCB stack, pointed to by the kernel's RSP is:
1004 * 0 user RIP
1005 * 8 user CS
1006 * 16 user RFLAGS
1007 * 24 user RSP
1008 * 32 user SS
b0d623f7 1009 *
39236c6e
A
1010 * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
1011 * as a user fault with:
6d2010ae
A
1012 * 0 ISF64_TRAPNO: trap code (NP or GP)
1013 * 8 ISF64_TRAPFN: trap function
1014 * 16 ISF64_ERR: segment number in error (error code)
39236c6e
A
1015 * 24 user RIP
1016 * 32 user CS
1017 * 40 user RFLAGS
1018 * 48 user RSP
1019 * 56 user SS
b0d623f7
A
1020 */
1021L_fault_iret:
1022 pop %rax /* recover saved %rax */
6d2010ae 1023 mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */
39236c6e
A
1024 mov ISF64_RSP(%rsp), %rax
1025 xchg %rax, %rsp /* switch to PCB stack */
1026 push ISF64_ERR(%rax)
1027 push ISF64_TRAPFN(%rax)
1028 push ISF64_TRAPNO(%rax)
1029 mov ISF64_RIP(%rax), %rax /* restore rax */
b0d623f7
A
1030 /* now treat as fault from user */
1031 jmp L_dispatch
1032
1033/*
1034 * Fault restoring a segment register. All of the saved state is still
1035 * on the stack untouched since we haven't yet moved the stack pointer.
39236c6e
A
1036 * On IST1 stack containing:
1037 * (rax saved above, which is immediately popped)
1038 * 0 ISF64_TRAPNO: trap code (NP or GP)
1039 * 8 ISF64_TRAPFN: trap function
1040 * 16 ISF64_ERR: segment number in error (error code)
1041 * 24 ISF64_RIP: kernel RIP
1042 * 32 ISF64_CS: kernel CS
1043 * 40 ISF64_RFLAGS: kernel RFLAGS
1044 * 48 ISF64_RSP: kernel RSP
1045 * 56 ISF64_SS: kernel SS
1046 * On the PCB stack, pointed to by the kernel's RSP is:
1047 * 0 user trap code
1048 * 8 user trap function
1049 * 16 user err
1050 * 24 user RIP
1051 * 32 user CS
1052 * 40 user RFLAGS
1053 * 48 user RSP
1054 * 56 user SS
b0d623f7
A
1055 */
1056L_32bit_fault_set_seg:
6d2010ae
A
1057 swapgs
1058 pop %rax /* toss saved %rax from stack */
1059 mov ISF64_TRAPNO(%rsp), %rax
1060 mov ISF64_TRAPFN(%rsp), %rcx
1061 mov ISF64_ERR(%rsp), %rdx
1062 mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */
39236c6e
A
1063 mov %rax,R64_TRAPNO(%rsp)
1064 mov %rcx,R64_TRAPFN(%rsp)
1065 mov %rdx,R64_ERR(%rsp)
b0d623f7
A
1066 /* now treat as fault from user */
1067 /* except that all the state is */
1068 /* already saved - we just have to */
1069 /* move the trapno and error into */
1070 /* the compatibility frame */
39236c6e 1071 jmp L_dispatch_U32_after_fault
b0d623f7 1072
39236c6e 1073
5c9f4661 1074Entry(ks_idt64_nmi_kernel)
39236c6e
A
1075 /* From user-space: copy interrupt state to user PCB */
1076 swapgs
1077 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */
1078 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */
1079 swapgs /* swap back for L_dispatch */
1080 jmp 4f /* Copy state to PCB */
1081
10821:
1083 /*
1084 * From kernel-space:
1085 * Determine whether the kernel or user GS is set.
1086 * Set the kernel and ensure that we'll swap back correctly at IRET.
1087 */
1088 mov $(MSR_IA32_GS_BASE), %ecx
1089 rdmsr /* read kernel gsbase */
1090 test $0x80000000, %edx /* test MSB of address */
1091 jne 2f
1092 swapgs /* so swap */
1093 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */
10942:
1095 /*
1096 * Determine whether we're on the kernel or interrupt stack
1097 * when the NMI hit.
1098 */
1099 mov ISF64_RSP(%rsp), %rcx
1100 mov %gs:CPU_KERNEL_STACK, %rax
1101 xor %rcx, %rax
1102 and EXT(kernel_stack_mask)(%rip), %rax
1103 test %rax, %rax /* are we on the kernel stack? */
1104 je 3f /* yes */
1105
1106 mov %gs:CPU_INT_STACK_TOP, %rax
1107 dec %rax /* intr stack top is byte above max */
1108 xor %rcx, %rax
1109 and EXT(kernel_stack_mask)(%rip), %rax
1110 test %rax, %rax /* are we on the interrupt stack? */
1111 je 3f /* yes */
1112
1113 mov %gs:CPU_KERNEL_STACK, %rcx
11143:
1115 /* 16-byte-align kernel/interrupt stack for state push */
1116 and $0xFFFFFFFFFFFFFFF0, %rcx
1117
11184:
1119 /*
1120 * Copy state from NMI stack (RSP) to the save area (RCX) which is
1121 * the PCB for user or kernel/interrupt stack from kernel.
1122 * ISF64_ERR(RSP) saved RAX
1123 * ISF64_TRAPFN(RSP) saved RCX
1124 * ISF64_TRAPNO(RSP) saved RDX
1125 */
1126 xchg %rsp, %rcx /* set for pushes */
1127 push ISF64_SS(%rcx)
1128 push ISF64_RSP(%rcx)
1129 push ISF64_RFLAGS(%rcx)
1130 push ISF64_CS(%rcx)
1131 push ISF64_RIP(%rcx)
1132 push $(0) /* error code 0 */
5c9f4661 1133 push $(HNDL_ALLINTRS) /* trapfn allintrs */
39236c6e
A
1134 push $(T_NMI) /* trapno T_NMI */
1135 mov ISF64_ERR(%rcx), %rax
1136 mov ISF64_TRAPNO(%rcx), %rdx
1137 mov ISF64_TRAPFN(%rcx), %rcx
1138 jmp L_dispatch
b0d623f7 1139
39236c6e
A
1140
1141/* All 'exceptions' enter hndl_alltraps, with:
1142 * r15 x86_saved_state_t address
1143 * rsp kernel stack if user-space, otherwise interrupt or kernel stack
1144 * esi cs at trap
b0d623f7
A
1145 *
1146 * The rest of the state is set up as:
39236c6e 1147 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1148 * interrupts disabled
1149 * direction flag cleared
1150 */
1151Entry(hndl_alltraps)
1152 mov %esi, %eax
1153 testb $3, %al
1154 jz trap_from_kernel
1155
1156 TIME_TRAP_UENTRY
1157
6d2010ae
A
1158 /* Check for active vtimers in the current task */
1159 mov %gs:CPU_ACTIVE_THREAD, %rcx
fe8ab488 1160 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */
6d2010ae
A
1161 mov TH_TASK(%rcx), %rbx
1162 TASK_VTIMER_CHECK(%rbx, %rcx)
1163
39236c6e 1164 CCALL1(user_trap, %r15) /* call user trap routine */
6d2010ae 1165 /* user_trap() unmasks interrupts */
b0d623f7 1166 cli /* hold off intrs - critical section */
b0d623f7
A
1167 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1168
b0d623f7
A
1169
1170Entry(return_from_trap)
39236c6e 1171 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */
fe8ab488 1172 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */
39236c6e
A
1173 cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */
1174 jz 1f
1175 xorq %rbp, %rbp /* clear framepointer */
1176 mov %r15, %rdi /* Set RDI to current thread */
1177 CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */
11781:
1179 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */
b0d623f7
A
1180 movl %gs:CPU_PENDING_AST,%eax
1181 testl %eax,%eax
39236c6e 1182 je EXT(return_to_user) /* branch if no AST */
b0d623f7
A
1183
1184L_return_from_trap_with_ast:
b0d623f7
A
1185 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */
1186 je 2f /* no, go handle the AST */
39236c6e 1187 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */
b0d623f7
A
1188 je 1f
1189 /* no... 32-bit user mode */
39236c6e 1190 movl R32_EIP(%r15), %edi
6d2010ae 1191 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1192 CCALL(commpage_is_in_pfz32)
1193 testl %eax, %eax
1194 je 2f /* not in the PFZ... go service AST */
39236c6e 1195 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7
A
1196 jmp EXT(return_to_user)
11971:
39236c6e 1198 movq R64_RIP(%r15), %rdi
6d2010ae 1199 xorq %rbp, %rbp /* clear framepointer */
b0d623f7
A
1200 CCALL(commpage_is_in_pfz64)
1201 testl %eax, %eax
1202 je 2f /* not in the PFZ... go service AST */
39236c6e 1203 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */
b0d623f7 1204 jmp EXT(return_to_user)
5c9f4661 12052:
b0d623f7 1206
6d2010ae 1207 xorq %rbp, %rbp /* clear framepointer */
5ba3f43e 1208 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */
b0d623f7 1209
3e170ce0 1210 cli
39236c6e 1211 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1212 xorl %ecx, %ecx /* don't check if we're in the PFZ */
1213 jmp EXT(return_from_trap) /* and check again (rare) */
1214
1215/*
1216 * Trap from kernel mode. No need to switch stacks.
1217 * Interrupts must be off here - we will set them to state at time of trap
1218 * as soon as it's safe for us to do so and not recurse doing preemption
39236c6e 1219 *
b0d623f7 1220 */
b0d623f7 1221trap_from_kernel:
39236c6e
A
1222 movq %r15, %rdi /* saved state addr */
1223 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */
b0d623f7
A
1224 pushq %rbp /* Extend framepointer chain */
1225 movq %rsp, %rbp
6d2010ae 1226 CCALLWITHSP(kernel_trap) /* to kernel trap routine */
b0d623f7
A
1227 popq %rbp
1228 addq $8, %rsp
39236c6e 1229 mov %rsp, %r15 /* DTrace slides stack/saved-state */
b0d623f7
A
1230 cli
1231
1232 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
1233 testl $(AST_URGENT),%eax /* any urgent preemption? */
1234 je ret_to_kernel /* no, nothing to do */
39236c6e 1235 cmpl $(T_PREEMPT),R64_TRAPNO(%r15)
b0d623f7 1236 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
39236c6e 1237 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */
b0d623f7
A
1238 je ret_to_kernel
1239 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1240 jne ret_to_kernel
1241 movq %gs:CPU_KERNEL_STACK,%rax
1242 movq %rsp,%rcx
1243 xorq %rax,%rcx
1244 andq EXT(kernel_stack_mask)(%rip),%rcx
1245 testq %rcx,%rcx /* are we on the kernel stack? */
1246 jne ret_to_kernel /* no, skip it */
1247
5ba3f43e 1248 CCALL(ast_taken_kernel) /* take the AST */
39236c6e
A
1249
1250 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1251 jmp ret_to_kernel
1252
1253
1254/*
1255 * All interrupts on all tasks enter here with:
39236c6e
A
1256 * r15 x86_saved_state_t
1257 * rsp kernel or interrupt stack
b0d623f7
A
1258 * esi cs at trap
1259 *
39236c6e 1260 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1261 * interrupts disabled
1262 * direction flag cleared
1263 */
1264Entry(hndl_allintrs)
1265 /*
1266 * test whether already on interrupt stack
1267 */
1268 movq %gs:CPU_INT_STACK_TOP,%rcx
1269 cmpq %rsp,%rcx
1270 jb 1f
1271 leaq -INTSTACK_SIZE(%rcx),%rdx
1272 cmpq %rsp,%rdx
1273 jb int_from_intstack
060df5ea 12741:
b0d623f7
A
1275 xchgq %rcx,%rsp /* switch to interrupt stack */
1276
1277 mov %cr0,%rax /* get cr0 */
1278 orl $(CR0_TS),%eax /* or in TS bit */
1279 mov %rax,%cr0 /* set cr0 */
1280
b0d623f7 1281 pushq %rcx /* save pointer to old stack */
39236c6e
A
1282 pushq %gs:CPU_INT_STATE /* save previous intr state */
1283 movq %r15,%gs:CPU_INT_STATE /* set intr state */
b0d623f7
A
1284
1285 TIME_INT_ENTRY /* do timing */
1286
6d2010ae
A
1287 /* Check for active vtimers in the current task */
1288 mov %gs:CPU_ACTIVE_THREAD, %rcx
1289 mov TH_TASK(%rcx), %rbx
1290 TASK_VTIMER_CHECK(%rbx, %rcx)
1291
b0d623f7
A
1292 incl %gs:CPU_PREEMPTION_LEVEL
1293 incl %gs:CPU_INTERRUPT_LEVEL
1294
39236c6e 1295 CCALL1(interrupt, %r15) /* call generic interrupt routine */
b0d623f7 1296
5c9f4661 1297.globl EXT(return_to_iret)
b0d623f7
A
1298LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */
1299
1300 decl %gs:CPU_INTERRUPT_LEVEL
1301 decl %gs:CPU_PREEMPTION_LEVEL
1302
1303 TIME_INT_EXIT /* do timing */
1304
39236c6e
A
1305 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */
1306 popq %rsp /* switch back to old stack */
1307
b0d623f7 1308 movq %gs:CPU_ACTIVE_THREAD,%rax
6d2010ae 1309 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */
b0d623f7
A
1310 cmpq $0,%rax /* Is there a context */
1311 je 1f /* Branch if not */
1312 movl FP_VALID(%rax),%eax /* Load fp_valid */
1313 cmpl $0,%eax /* Check if valid */
1314 jne 1f /* Branch if valid */
1315 clts /* Clear TS */
1316 jmp 2f
13171:
1318 mov %cr0,%rax /* get cr0 */
1319 orl $(CR0_TS),%eax /* or in TS bit */
1320 mov %rax,%cr0 /* set cr0 */
13212:
b0d623f7 1322 /* Load interrupted code segment into %eax */
39236c6e
A
1323 movl R32_CS(%r15),%eax /* assume 32-bit state */
1324 cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
b0d623f7
A
1325#if DEBUG_IDT64
1326 jne 4f
39236c6e 1327 movl R64_CS(%r15),%eax /* 64-bit user mode */
b0d623f7
A
1328 jmp 3f
13294:
39236c6e 1330 cmpl $(SS_32),SS_FLAVOR(%r15)
b0d623f7
A
1331 je 3f
1332 POSTCODE2(0x6431)
39236c6e 1333 CCALL1(panic_idt64, %r15)
b0d623f7
A
1334 hlt
1335#else
1336 jne 3f
39236c6e 1337 movl R64_CS(%r15),%eax /* 64-bit user mode */
b0d623f7
A
1338#endif
13393:
1340 testb $3,%al /* user mode, */
1341 jnz ast_from_interrupt_user /* go handle potential ASTs */
1342 /*
1343 * we only want to handle preemption requests if
1344 * the interrupt fell in the kernel context
1345 * and preemption isn't disabled
1346 */
1347 movl %gs:CPU_PENDING_AST,%eax
1348 testl $(AST_URGENT),%eax /* any urgent requests? */
1349 je ret_to_kernel /* no, nothing to do */
1350
1351 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
1352 jne ret_to_kernel /* yes, skip it */
1353
b0d623f7
A
1354 /*
1355 * Take an AST from kernel space. We don't need (and don't want)
1356 * to do as much as the case where the interrupt came from user
1357 * space.
1358 */
5ba3f43e 1359 CCALL(ast_taken_kernel)
b0d623f7 1360
39236c6e 1361 mov %rsp, %r15 /* AST changes stack, saved state */
b0d623f7
A
1362 jmp ret_to_kernel
1363
1364
1365/*
1366 * nested int - simple path, can't preempt etc on way out
1367 */
1368int_from_intstack:
1369 incl %gs:CPU_PREEMPTION_LEVEL
1370 incl %gs:CPU_INTERRUPT_LEVEL
060df5ea 1371 incl %gs:CPU_NESTED_ISTACK
39236c6e
A
1372
1373 push %gs:CPU_INT_STATE
1374 mov %r15, %gs:CPU_INT_STATE
1375
1376 CCALL1(interrupt, %r15)
1377
1378 pop %gs:CPU_INT_STATE
b0d623f7
A
1379
1380 decl %gs:CPU_INTERRUPT_LEVEL
1381 decl %gs:CPU_PREEMPTION_LEVEL
060df5ea 1382 decl %gs:CPU_NESTED_ISTACK
39236c6e 1383
b0d623f7
A
1384 jmp ret_to_kernel
1385
1386/*
1387 * Take an AST from an interrupted user
1388 */
1389ast_from_interrupt_user:
1390 movl %gs:CPU_PENDING_AST,%eax
1391 testl %eax,%eax /* pending ASTs? */
1392 je EXT(ret_to_user) /* no, nothing to do */
1393
1394 TIME_TRAP_UENTRY
1395
1396 movl $1, %ecx /* check if we're in the PFZ */
1397 jmp L_return_from_trap_with_ast /* return */
1398
1399
1400/* Syscall dispatch routines! */
1401
1402/*
1403 *
1404 * 32bit Tasks
1405 * System call entries via INTR_GATE or sysenter:
1406 *
39236c6e
A
1407 * r15 x86_saved_state32_t
1408 * rsp kernel stack
1409 *
1410 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1411 * interrupts disabled
1412 * direction flag cleared
1413 */
1414
1415Entry(hndl_sysenter)
1416 /*
1417 * We can be here either for a mach syscall or a unix syscall,
1418 * as indicated by the sign of the code:
1419 */
39236c6e 1420 movl R32_EAX(%r15),%eax
b0d623f7
A
1421 testl %eax,%eax
1422 js EXT(hndl_mach_scall) /* < 0 => mach */
1423 /* > 0 => unix */
1424
1425Entry(hndl_unix_scall)
b0d623f7
A
1426
1427 TIME_TRAP_UENTRY
1428
b0d623f7 1429 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1430 movq TH_TASK(%rcx),%rbx /* point to current task */
1431 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1432
1433 /* Check for active vtimers in the current task */
1434 TASK_VTIMER_CHECK(%rbx,%rcx)
1435
1436 sti
1437
39236c6e 1438 CCALL1(unix_syscall, %r15)
b0d623f7
A
1439 /*
1440 * always returns through thread_exception_return
1441 */
1442
1443
1444Entry(hndl_mach_scall)
1445 TIME_TRAP_UENTRY
1446
b0d623f7 1447 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae
A
1448 movq TH_TASK(%rcx),%rbx /* point to current task */
1449 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1450
1451 /* Check for active vtimers in the current task */
1452 TASK_VTIMER_CHECK(%rbx,%rcx)
1453
1454 sti
1455
39236c6e 1456 CCALL1(mach_call_munger, %r15)
b0d623f7
A
1457 /*
1458 * always returns through thread_exception_return
1459 */
1460
1461
1462Entry(hndl_mdep_scall)
1463 TIME_TRAP_UENTRY
1464
b0d623f7
A
1465 /* Check for active vtimers in the current task */
1466 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
6d2010ae 1467 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1468 TASK_VTIMER_CHECK(%rbx,%rcx)
1469
1470 sti
1471
39236c6e 1472 CCALL1(machdep_syscall, %r15)
b0d623f7
A
1473 /*
1474 * always returns through thread_exception_return
1475 */
1476
b0d623f7
A
1477/*
1478 * 64bit Tasks
1479 * System call entries via syscall only:
1480 *
39236c6e
A
1481 * r15 x86_saved_state64_t
1482 * rsp kernel stack
1483 *
1484 * both rsp and r15 are 16-byte aligned
b0d623f7
A
1485 * interrupts disabled
1486 * direction flag cleared
1487 */
1488
1489Entry(hndl_syscall)
1490 TIME_TRAP_UENTRY
1491
b0d623f7 1492 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */
fe8ab488 1493 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */
6d2010ae 1494 movq TH_TASK(%rcx),%rbx /* point to current task */
b0d623f7
A
1495
1496 /* Check for active vtimers in the current task */
1497 TASK_VTIMER_CHECK(%rbx,%rcx)
1498
1499 /*
1500 * We can be here either for a mach, unix machdep or diag syscall,
1501 * as indicated by the syscall class:
1502 */
39236c6e 1503 movl R64_RAX(%r15), %eax /* syscall number/class */
b0d623f7
A
1504 movl %eax, %edx
1505 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */
1506 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1507 je EXT(hndl_mach_scall64)
1508 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1509 je EXT(hndl_unix_scall64)
1510 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1511 je EXT(hndl_mdep_scall64)
1512 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1513 je EXT(hndl_diag_scall64)
1514
1515 /* Syscall class unknown */
316670eb 1516 sti
b0d623f7
A
1517 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1518 /* no return */
1519
1520
1521Entry(hndl_unix_scall64)
6d2010ae 1522 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */
b0d623f7
A
1523 sti
1524
39236c6e 1525 CCALL1(unix_syscall64, %r15)
b0d623f7
A
1526 /*
1527 * always returns through thread_exception_return
1528 */
1529
1530
1531Entry(hndl_mach_scall64)
6d2010ae 1532 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */
b0d623f7
A
1533 sti
1534
39236c6e 1535 CCALL1(mach_call_munger64, %r15)
b0d623f7
A
1536 /*
1537 * always returns through thread_exception_return
1538 */
1539
1540
1541
1542Entry(hndl_mdep_scall64)
1543 sti
1544
39236c6e 1545 CCALL1(machdep_syscall64, %r15)
b0d623f7
A
1546 /*
1547 * always returns through thread_exception_return
1548 */
1549
b0d623f7 1550Entry(hndl_diag_scall64)
39236c6e 1551 CCALL1(diagCall64, %r15) // Call diagnostics
316670eb 1552 test %eax, %eax // What kind of return is this?
060df5ea 1553 je 1f // - branch if bad (zero)
060df5ea
A
1554 jmp EXT(return_to_user) // Normal return, do not check asts...
15551:
316670eb 1556 sti
b0d623f7
A
1557 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1558 /* no return */
5c9f4661 1559/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
b0d623f7 1560Entry(hndl_machine_check)
5c9f4661 1561 /* Adjust SP and savearea to their canonical, non-aliased addresses */
39236c6e 1562 CCALL1(panic_machine_check64, %r15)
b0d623f7
A
1563 hlt
1564
1565Entry(hndl_double_fault)
39236c6e 1566 CCALL1(panic_double_fault64, %r15)
b0d623f7 1567 hlt