]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
f427ee49 | 2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
0a7de745 A |
29 | * @OSF_COPYRIGHT@ |
30 | */ | |
1c79356b | 31 | /* |
0a7de745 A |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | /* | |
57 | */ | |
2d21ac55 | 58 | |
1c79356b | 59 | /* |
0a7de745 | 60 | * Hardware trap/fault handler. |
1c79356b A |
61 | */ |
62 | ||
1c79356b A |
63 | #include <mach_kdp.h> |
64 | #include <mach_ldebug.h> | |
65 | ||
66 | #include <types.h> | |
67 | #include <i386/eflags.h> | |
68 | #include <i386/trap.h> | |
69 | #include <i386/pmap.h> | |
70 | #include <i386/fpu.h> | |
f427ee49 | 71 | #include <i386/panic_notify.h> |
b0d623f7 | 72 | #include <i386/lapic.h> |
1c79356b A |
73 | |
74 | #include <mach/exception.h> | |
75 | #include <mach/kern_return.h> | |
76 | #include <mach/vm_param.h> | |
77 | #include <mach/i386/thread_status.h> | |
78 | ||
79 | #include <vm/vm_kern.h> | |
80 | #include <vm/vm_fault.h> | |
81 | ||
1c79356b | 82 | #include <kern/kern_types.h> |
91447636 | 83 | #include <kern/processor.h> |
1c79356b A |
84 | #include <kern/thread.h> |
85 | #include <kern/task.h> | |
86 | #include <kern/sched.h> | |
87 | #include <kern/sched_prim.h> | |
88 | #include <kern/exception.h> | |
89 | #include <kern/spl.h> | |
90 | #include <kern/misc_protos.h> | |
b0d623f7 | 91 | #include <kern/debug.h> |
39236c6e A |
92 | #if CONFIG_TELEMETRY |
93 | #include <kern/telemetry.h> | |
94 | #endif | |
0c530ab8 | 95 | #include <sys/kdebug.h> |
d9a64523 | 96 | #include <kperf/kperf.h> |
fe8ab488 | 97 | #include <prng/random.h> |
2a1bd2d3 | 98 | #include <prng/entropy.h> |
0c530ab8 | 99 | |
1c79356b A |
100 | #include <string.h> |
101 | ||
0c530ab8 A |
102 | #include <i386/postcode.h> |
103 | #include <i386/mp_desc.h> | |
104 | #include <i386/proc_reg.h> | |
f427ee49 | 105 | #include <i386/machine_routines.h> |
b0d623f7 | 106 | #if CONFIG_MCA |
0c530ab8 | 107 | #include <i386/machine_check.h> |
b0d623f7 | 108 | #endif |
0c530ab8 | 109 | #include <mach/i386/syscall_sw.h> |
1c79356b | 110 | |
b0d623f7 | 111 | #include <libkern/OSDebug.h> |
bd504ef0 | 112 | #include <i386/cpu_threads.h> |
6d2010ae | 113 | #include <machine/pal_routines.h> |
593a1d5f | 114 | |
6d2010ae A |
115 | extern void throttle_lowpri_io(int); |
116 | extern void kprint_state(x86_saved_state64_t *saved_state); | |
f427ee49 A |
117 | #if DEVELOPMENT || DEBUG |
118 | int insnstream_force_cacheline_mismatch = 0; | |
119 | extern int panic_on_cacheline_mismatch; | |
120 | extern char panic_on_trap_procname[]; | |
121 | extern uint32_t panic_on_trap_mask; | |
122 | #endif | |
123 | ||
124 | extern int insn_copyin_count; | |
b0d623f7 | 125 | |
1c79356b A |
126 | /* |
127 | * Forward declarations | |
128 | */ | |
cb323159 | 129 | static void panic_trap(x86_saved_state64_t *saved_state, uint32_t pl, kern_return_t fault_result) __dead2; |
b0d623f7 | 130 | static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip); |
f427ee49 A |
131 | #if DEVELOPMENT || DEBUG |
132 | static __attribute__((noinline)) void copy_instruction_stream(thread_t thread, uint64_t rip, int trap_code, bool inspect_cacheline); | |
133 | #else | |
134 | static __attribute__((noinline)) void copy_instruction_stream(thread_t thread, uint64_t rip, int trap_code); | |
135 | #endif | |
6601e61a | 136 | |
2d21ac55 A |
137 | #if CONFIG_DTRACE |
138 | /* See <rdar://problem/4613924> */ | |
139 | perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routine */ | |
140 | ||
141 | extern boolean_t dtrace_tally_fault(user_addr_t); | |
f427ee49 | 142 | extern boolean_t dtrace_handle_trap(int, x86_saved_state_t *); |
2d21ac55 A |
143 | #endif |
144 | ||
f427ee49 A |
145 | #ifdef MACH_BSD |
146 | extern char * proc_name_address(void *p); | |
147 | #endif /* MACH_BSD */ | |
148 | ||
13f56ec4 | 149 | extern boolean_t pmap_smep_enabled; |
fe8ab488 | 150 | extern boolean_t pmap_smap_enabled; |
7ddcb079 | 151 | |
39037602 | 152 | __attribute__((noreturn)) |
1c79356b A |
153 | void |
154 | thread_syscall_return( | |
0a7de745 | 155 | kern_return_t ret) |
1c79356b | 156 | { |
0a7de745 A |
157 | thread_t thr_act = current_thread(); |
158 | boolean_t is_mach; | |
159 | int code; | |
b0d623f7 | 160 | |
6d2010ae | 161 | pal_register_cache_state(thr_act, DIRTY); |
0c530ab8 | 162 | |
0a7de745 A |
163 | if (thread_is_64bit_addr(thr_act)) { |
164 | x86_saved_state64_t *regs; | |
165 | ||
0c530ab8 A |
166 | regs = USER_REGS64(thr_act); |
167 | ||
b0d623f7 A |
168 | code = (int) (regs->rax & SYSCALL_NUMBER_MASK); |
169 | is_mach = (regs->rax & SYSCALL_CLASS_MASK) | |
0a7de745 | 170 | == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT); |
b0d623f7 | 171 | if (kdebug_enable && is_mach) { |
0a7de745 A |
172 | /* Mach trap */ |
173 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
174 | MACHDBG_CODE(DBG_MACH_EXCP_SC, code) | DBG_FUNC_END, | |
175 | ret, 0, 0, 0, 0); | |
0c530ab8 A |
176 | } |
177 | regs->rax = ret; | |
b0d623f7 | 178 | #if DEBUG |
0a7de745 | 179 | if (is_mach) { |
b0d623f7 A |
180 | DEBUG_KPRINT_SYSCALL_MACH( |
181 | "thread_syscall_return: 64-bit mach ret=%u\n", | |
182 | ret); | |
0a7de745 | 183 | } else { |
b0d623f7 A |
184 | DEBUG_KPRINT_SYSCALL_UNIX( |
185 | "thread_syscall_return: 64-bit unix ret=%u\n", | |
186 | ret); | |
0a7de745 | 187 | } |
b0d623f7 | 188 | #endif |
0c530ab8 | 189 | } else { |
0a7de745 A |
190 | x86_saved_state32_t *regs; |
191 | ||
0c530ab8 A |
192 | regs = USER_REGS32(thr_act); |
193 | ||
b0d623f7 A |
194 | code = ((int) regs->eax); |
195 | is_mach = (code < 0); | |
196 | if (kdebug_enable && is_mach) { | |
0a7de745 A |
197 | /* Mach trap */ |
198 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
199 | MACHDBG_CODE(DBG_MACH_EXCP_SC, -code) | DBG_FUNC_END, | |
200 | ret, 0, 0, 0, 0); | |
0c530ab8 A |
201 | } |
202 | regs->eax = ret; | |
b0d623f7 | 203 | #if DEBUG |
0a7de745 | 204 | if (is_mach) { |
b0d623f7 A |
205 | DEBUG_KPRINT_SYSCALL_MACH( |
206 | "thread_syscall_return: 32-bit mach ret=%u\n", | |
207 | ret); | |
0a7de745 | 208 | } else { |
b0d623f7 A |
209 | DEBUG_KPRINT_SYSCALL_UNIX( |
210 | "thread_syscall_return: 32-bit unix ret=%u\n", | |
211 | ret); | |
0a7de745 | 212 | } |
b0d623f7 | 213 | #endif |
0c530ab8 | 214 | } |
5ba3f43e A |
215 | |
216 | #if DEBUG || DEVELOPMENT | |
217 | kern_allocation_name_t | |
218 | prior __assert_only = thread_get_kernel_state(thr_act)->allocation_name; | |
219 | assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior)); | |
220 | #endif /* DEBUG || DEVELOPMENT */ | |
221 | ||
39236c6e | 222 | throttle_lowpri_io(1); |
593a1d5f A |
223 | |
224 | thread_exception_return(); | |
0a7de745 | 225 | /*NOTREACHED*/ |
1c79356b A |
226 | } |
227 | ||
1c79356b A |
228 | /* |
229 | * Fault recovery in copyin/copyout routines. | |
230 | */ | |
231 | struct recovery { | |
0a7de745 A |
232 | uintptr_t fault_addr; |
233 | uintptr_t recover_addr; | |
1c79356b A |
234 | }; |
235 | ||
0a7de745 A |
236 | extern struct recovery recover_table[]; |
237 | extern struct recovery recover_table_end[]; | |
1c79356b | 238 | |
0a7de745 A |
239 | const char * trap_type[] = {TRAP_NAMES}; |
240 | unsigned TRAP_TYPES = sizeof(trap_type) / sizeof(trap_type[0]); | |
91447636 | 241 | |
0a7de745 | 242 | extern void PE_incoming_interrupt(int interrupt); |
6d2010ae | 243 | |
b0d623f7 | 244 | #if defined(__x86_64__) && DEBUG |
6d2010ae | 245 | void |
0a7de745 | 246 | kprint_state(x86_saved_state64_t *saved_state) |
b0d623f7 A |
247 | { |
248 | kprintf("current_cpu_datap() 0x%lx\n", (uintptr_t)current_cpu_datap()); | |
249 | kprintf("Current GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_GS_BASE)); | |
250 | kprintf("Kernel GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_KERNEL_GS_BASE)); | |
251 | kprintf("state at 0x%lx:\n", (uintptr_t) saved_state); | |
252 | ||
0a7de745 A |
253 | kprintf(" rdi 0x%llx\n", saved_state->rdi); |
254 | kprintf(" rsi 0x%llx\n", saved_state->rsi); | |
b0d623f7 A |
255 | kprintf(" rdx 0x%llx\n", saved_state->rdx); |
256 | kprintf(" r10 0x%llx\n", saved_state->r10); | |
257 | kprintf(" r8 0x%llx\n", saved_state->r8); | |
0a7de745 | 258 | kprintf(" r9 0x%llx\n", saved_state->r9); |
b0d623f7 A |
259 | |
260 | kprintf(" cr2 0x%llx\n", saved_state->cr2); | |
261 | kprintf("real cr2 0x%lx\n", get_cr2()); | |
262 | kprintf(" r15 0x%llx\n", saved_state->r15); | |
263 | kprintf(" r14 0x%llx\n", saved_state->r14); | |
264 | kprintf(" r13 0x%llx\n", saved_state->r13); | |
265 | kprintf(" r12 0x%llx\n", saved_state->r12); | |
266 | kprintf(" r11 0x%llx\n", saved_state->r11); | |
267 | kprintf(" rbp 0x%llx\n", saved_state->rbp); | |
268 | kprintf(" rbx 0x%llx\n", saved_state->rbx); | |
269 | kprintf(" rcx 0x%llx\n", saved_state->rcx); | |
270 | kprintf(" rax 0x%llx\n", saved_state->rax); | |
271 | ||
272 | kprintf(" gs 0x%x\n", saved_state->gs); | |
273 | kprintf(" fs 0x%x\n", saved_state->fs); | |
274 | ||
275 | kprintf(" isf.trapno 0x%x\n", saved_state->isf.trapno); | |
276 | kprintf(" isf._pad 0x%x\n", saved_state->isf._pad); | |
277 | kprintf(" isf.trapfn 0x%llx\n", saved_state->isf.trapfn); | |
278 | kprintf(" isf.err 0x%llx\n", saved_state->isf.err); | |
279 | kprintf(" isf.rip 0x%llx\n", saved_state->isf.rip); | |
280 | kprintf(" isf.cs 0x%llx\n", saved_state->isf.cs); | |
281 | kprintf(" isf.rflags 0x%llx\n", saved_state->isf.rflags); | |
282 | kprintf(" isf.rsp 0x%llx\n", saved_state->isf.rsp); | |
283 | kprintf(" isf.ss 0x%llx\n", saved_state->isf.ss); | |
284 | } | |
b0d623f7 A |
285 | #endif |
286 | ||
060df5ea | 287 | |
060df5ea A |
288 | /* |
289 | * Non-zero indicates latency assert is enabled and capped at valued | |
290 | * absolute time units. | |
291 | */ | |
0a7de745 | 292 | |
060df5ea A |
293 | uint64_t interrupt_latency_cap = 0; |
294 | boolean_t ilat_assert = FALSE; | |
295 | ||
296 | void | |
0a7de745 A |
297 | interrupt_latency_tracker_setup(void) |
298 | { | |
060df5ea A |
299 | uint32_t ilat_cap_us; |
300 | if (PE_parse_boot_argn("interrupt_latency_cap_us", &ilat_cap_us, sizeof(ilat_cap_us))) { | |
301 | interrupt_latency_cap = ilat_cap_us * NSEC_PER_USEC; | |
302 | nanoseconds_to_absolutetime(interrupt_latency_cap, &interrupt_latency_cap); | |
303 | } else { | |
304 | interrupt_latency_cap = LockTimeOut; | |
305 | } | |
306 | PE_parse_boot_argn("-interrupt_latency_assert_enable", &ilat_assert, sizeof(ilat_assert)); | |
307 | } | |
308 | ||
0a7de745 A |
309 | void |
310 | interrupt_reset_latency_stats(void) | |
311 | { | |
060df5ea A |
312 | uint32_t i; |
313 | for (i = 0; i < real_ncpus; i++) { | |
314 | cpu_data_ptr[i]->cpu_max_observed_int_latency = | |
315 | cpu_data_ptr[i]->cpu_max_observed_int_latency_vector = 0; | |
316 | } | |
317 | } | |
318 | ||
0a7de745 A |
319 | void |
320 | interrupt_populate_latency_stats(char *buf, unsigned bufsize) | |
321 | { | |
060df5ea A |
322 | uint32_t i, tcpu = ~0; |
323 | uint64_t cur_max = 0; | |
324 | ||
325 | for (i = 0; i < real_ncpus; i++) { | |
326 | if (cur_max < cpu_data_ptr[i]->cpu_max_observed_int_latency) { | |
327 | cur_max = cpu_data_ptr[i]->cpu_max_observed_int_latency; | |
328 | tcpu = i; | |
329 | } | |
330 | } | |
331 | ||
0a7de745 | 332 | if (tcpu < real_ncpus) { |
060df5ea | 333 | snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency); |
0a7de745 | 334 | } |
060df5ea | 335 | } |
b0d623f7 | 336 | |
39236c6e A |
337 | uint32_t interrupt_timer_coalescing_enabled = 1; |
338 | uint64_t interrupt_coalesced_timers; | |
339 | ||
b0d623f7 A |
340 | /* |
341 | * Handle interrupts: | |
342 | * - local APIC interrupts (IPIs, timers, etc) are handled by the kernel, | |
343 | * - device interrupts go to the platform expert. | |
344 | */ | |
345 | void | |
346 | interrupt(x86_saved_state_t *state) | |
347 | { | |
0a7de745 A |
348 | uint64_t rip; |
349 | uint64_t rsp; | |
350 | int interrupt_num; | |
351 | boolean_t user_mode = FALSE; | |
352 | int ipl; | |
353 | int cnum = cpu_number(); | |
354 | cpu_data_t *cdp = cpu_data_ptr[cnum]; | |
355 | int itype = DBG_INTR_TYPE_UNKNOWN; | |
356 | int handled; | |
357 | ||
358 | x86_saved_state64_t *state64 = saved_state64(state); | |
5ba3f43e A |
359 | rip = state64->isf.rip; |
360 | rsp = state64->isf.rsp; | |
361 | interrupt_num = state64->isf.trapno; | |
0a7de745 | 362 | if (state64->isf.cs & 0x03) { |
5ba3f43e | 363 | user_mode = TRUE; |
0a7de745 | 364 | } |
b0d623f7 | 365 | |
94ff46dc A |
366 | #if DEVELOPMENT || DEBUG |
367 | uint64_t frameptr = is_saved_state64(state) ? state64->rbp : saved_state32(state)->ebp; | |
368 | uint32_t traptrace_index = traptrace_start(interrupt_num, rip, mach_absolute_time(), frameptr); | |
369 | #endif | |
370 | ||
0a7de745 | 371 | if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage) { |
bd504ef0 | 372 | cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++; |
0a7de745 | 373 | } |
bd504ef0 | 374 | |
0a7de745 | 375 | if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_INTERPROCESSOR_INTERRUPT)) { |
5ba3f43e | 376 | itype = DBG_INTR_TYPE_IPI; |
0a7de745 | 377 | } else if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT)) { |
5ba3f43e | 378 | itype = DBG_INTR_TYPE_TIMER; |
0a7de745 | 379 | } else { |
5ba3f43e | 380 | itype = DBG_INTR_TYPE_OTHER; |
0a7de745 | 381 | } |
316670eb | 382 | |
0a7de745 A |
383 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, |
384 | MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, | |
385 | interrupt_num, | |
386 | (user_mode ? rip : VM_KERNEL_UNSLIDE(rip)), | |
387 | user_mode, itype, 0); | |
6d2010ae | 388 | |
f427ee49 | 389 | SCHED_STATS_INC(interrupt_count); |
6d2010ae | 390 | |
39236c6e | 391 | #if CONFIG_TELEMETRY |
3e170ce0 | 392 | if (telemetry_needs_record) { |
d9a64523 | 393 | telemetry_mark_curthread(user_mode, FALSE); |
39236c6e A |
394 | } |
395 | #endif | |
396 | ||
6d2010ae | 397 | ipl = get_preemption_level(); |
0a7de745 | 398 | |
b0d623f7 A |
399 | /* |
400 | * Handle local APIC interrupts | |
401 | * else call platform expert for devices. | |
6d2010ae | 402 | */ |
0a7de745 A |
403 | handled = lapic_interrupt(interrupt_num, state); |
404 | ||
405 | if (!handled) { | |
406 | if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_CMCI_INTERRUPT)) { | |
407 | /* | |
408 | * CMCI can be signalled on any logical processor, and the kexts | |
409 | * that implement handling CMCI use IOKit to register handlers for | |
410 | * the CMCI vector, so if we see a CMCI, do not encode a CPU | |
411 | * number in bits 8:31 (since the vector is the same regardless of | |
412 | * the handling CPU). | |
413 | */ | |
414 | PE_incoming_interrupt(interrupt_num); | |
415 | } else if (cnum <= lapic_max_interrupt_cpunum) { | |
416 | PE_incoming_interrupt((cnum << 8) | interrupt_num); | |
417 | } | |
fe8ab488 | 418 | } |
6d2010ae A |
419 | |
420 | if (__improbable(get_preemption_level() != ipl)) { | |
421 | panic("Preemption level altered by interrupt vector 0x%x: initial 0x%x, final: 0x%x\n", interrupt_num, ipl, get_preemption_level()); | |
060df5ea | 422 | } |
b0d623f7 | 423 | |
316670eb | 424 | |
0a7de745 A |
425 | if (__improbable(cdp->cpu_nested_istack)) { |
426 | cdp->cpu_nested_istack_events++; | |
427 | } else { | |
39236c6e A |
428 | uint64_t ctime = mach_absolute_time(); |
429 | uint64_t int_latency = ctime - cdp->cpu_int_event_time; | |
430 | uint64_t esdeadline, ehdeadline; | |
431 | /* Attempt to process deferred timers in the context of | |
432 | * this interrupt, unless interrupt time has already exceeded | |
433 | * TCOAL_ILAT_THRESHOLD. | |
434 | */ | |
435 | #define TCOAL_ILAT_THRESHOLD (30000ULL) | |
436 | ||
437 | if ((int_latency < TCOAL_ILAT_THRESHOLD) && | |
438 | interrupt_timer_coalescing_enabled) { | |
439 | esdeadline = cdp->rtclock_timer.queue.earliest_soft_deadline; | |
440 | ehdeadline = cdp->rtclock_timer.deadline; | |
441 | if ((ctime >= esdeadline) && (ctime < ehdeadline)) { | |
442 | interrupt_coalesced_timers++; | |
443 | TCOAL_DEBUG(0x88880000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, interrupt_coalesced_timers, 0); | |
444 | rtclock_intr(state); | |
445 | TCOAL_DEBUG(0x88880000 | DBG_FUNC_END, ctime, esdeadline, interrupt_coalesced_timers, 0, 0); | |
446 | } else { | |
447 | TCOAL_DEBUG(0x77770000, ctime, cdp->rtclock_timer.queue.earliest_soft_deadline, cdp->rtclock_timer.deadline, interrupt_coalesced_timers, 0); | |
448 | } | |
060df5ea | 449 | } |
39236c6e A |
450 | |
451 | if (__improbable(ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended())) { | |
452 | panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x, current signals: 0x%x", interrupt_num, int_latency, cdp->cpu_prior_signals, cdp->cpu_signals); | |
453 | } | |
454 | ||
455 | if (__improbable(int_latency > cdp->cpu_max_observed_int_latency)) { | |
456 | cdp->cpu_max_observed_int_latency = int_latency; | |
457 | cdp->cpu_max_observed_int_latency_vector = interrupt_num; | |
060df5ea A |
458 | } |
459 | } | |
460 | ||
b0d623f7 A |
461 | /* |
462 | * Having serviced the interrupt first, look at the interrupted stack depth. | |
463 | */ | |
464 | if (!user_mode) { | |
39236c6e | 465 | uint64_t depth = cdp->cpu_kernel_stack |
0a7de745 A |
466 | + sizeof(struct thread_kernel_state) |
467 | + sizeof(struct i386_exception_link *) | |
468 | - rsp; | |
39236c6e | 469 | if (__improbable(depth > kernel_stack_depth_max)) { |
b0d623f7 A |
470 | kernel_stack_depth_max = (vm_offset_t)depth; |
471 | KERNEL_DEBUG_CONSTANT( | |
472 | MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH), | |
316670eb | 473 | (long) depth, (long) VM_KERNEL_UNSLIDE(rip), 0, 0, 0); |
b0d623f7 A |
474 | } |
475 | } | |
d9a64523 | 476 | |
0a7de745 | 477 | if (cnum == master_cpu) { |
2a1bd2d3 | 478 | entropy_collect(); |
0a7de745 | 479 | } |
fe8ab488 | 480 | |
d9a64523 A |
481 | #if KPERF |
482 | kperf_interrupt(); | |
483 | #endif /* KPERF */ | |
484 | ||
485 | KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, | |
0a7de745 | 486 | interrupt_num); |
39236c6e | 487 | |
3e170ce0 | 488 | assert(ml_get_interrupts_enabled() == FALSE); |
94ff46dc A |
489 | |
490 | #if DEVELOPMENT || DEBUG | |
491 | if (traptrace_index != TRAPTRACE_INVALID_INDEX) { | |
492 | traptrace_end(traptrace_index, mach_absolute_time()); | |
493 | } | |
494 | #endif | |
b0d623f7 | 495 | } |
2d21ac55 | 496 | |
0c530ab8 A |
497 | static inline void |
498 | reset_dr7(void) | |
499 | { | |
b0d623f7 | 500 | long dr7 = 0x400; /* magic dr7 reset value; 32 bit on i386, 64 bit on x86_64 */ |
0a7de745 | 501 | __asm__ volatile ("mov %0,%%dr7" : : "r" (dr7)); |
0c530ab8 A |
502 | } |
503 | #if MACH_KDP | |
504 | unsigned kdp_has_active_watchpoints = 0; | |
b0d623f7 A |
505 | #define NO_WATCHPOINTS (!kdp_has_active_watchpoints) |
506 | #else | |
507 | #define NO_WATCHPOINTS 1 | |
0c530ab8 | 508 | #endif |
1c79356b A |
509 | /* |
510 | * Trap from kernel mode. Only page-fault errors are recoverable, | |
511 | * and then only in special circumstances. All other errors are | |
512 | * fatal. Return value indicates if trap was handled. | |
513 | */ | |
b0d623f7 | 514 | |
0c530ab8 | 515 | void |
1c79356b | 516 | kernel_trap( |
0a7de745 | 517 | x86_saved_state_t *state, |
6d2010ae | 518 | uintptr_t *lo_spp) |
1c79356b | 519 | { |
0a7de745 A |
520 | x86_saved_state64_t *saved_state; |
521 | int code; | |
522 | user_addr_t vaddr; | |
523 | int type; | |
524 | vm_map_t map = 0; /* protected by T_PAGE_FAULT */ | |
525 | kern_return_t result = KERN_FAILURE; | |
526 | kern_return_t fault_result = KERN_SUCCESS; | |
527 | thread_t thread; | |
0c530ab8 | 528 | boolean_t intr; |
0a7de745 A |
529 | vm_prot_t prot; |
530 | struct recovery *rp; | |
531 | vm_offset_t kern_ip; | |
0a7de745 A |
532 | int is_user; |
533 | int trap_pl = get_preemption_level(); | |
3e170ce0 | 534 | |
1c79356b | 535 | thread = current_thread(); |
1c79356b | 536 | |
0a7de745 | 537 | if (__improbable(is_saved_state32(state))) { |
b0d623f7 | 538 | panic("kernel_trap(%p) with 32-bit state", state); |
0a7de745 | 539 | } |
b0d623f7 | 540 | saved_state = saved_state64(state); |
6d2010ae A |
541 | |
542 | /* Record cpu where state was captured */ | |
543 | saved_state->isf.cpu = cpu_number(); | |
544 | ||
b0d623f7 A |
545 | vaddr = (user_addr_t)saved_state->cr2; |
546 | type = saved_state->isf.trapno; | |
547 | code = (int)(saved_state->isf.err & 0xffff); | |
0a7de745 | 548 | intr = (saved_state->isf.rflags & EFL_IF) != 0; /* state of ints at trap */ |
b0d623f7 | 549 | kern_ip = (vm_offset_t)saved_state->isf.rip; |
0c530ab8 | 550 | |
39037602 A |
551 | is_user = (vaddr < VM_MAX_USER_PAGE_ADDRESS); |
552 | ||
94ff46dc A |
553 | #if DEVELOPMENT || DEBUG |
554 | uint32_t traptrace_index = traptrace_start(type, kern_ip, mach_absolute_time(), saved_state->rbp); | |
555 | #endif | |
556 | ||
2d21ac55 | 557 | #if CONFIG_DTRACE |
fe8ab488 A |
558 | /* |
559 | * Is there a DTrace hook? | |
0a7de745 | 560 | */ |
6d2010ae A |
561 | if (__improbable(tempDTraceTrapHook != NULL)) { |
562 | if (tempDTraceTrapHook(type, state, lo_spp, 0) == KERN_SUCCESS) { | |
2d21ac55 A |
563 | /* |
564 | * If it succeeds, we are done... | |
565 | */ | |
94ff46dc | 566 | goto common_return; |
2d21ac55 A |
567 | } |
568 | } | |
f427ee49 A |
569 | |
570 | /* Handle traps originated from probe context. */ | |
571 | if (thread != THREAD_NULL && thread->t_dtrace_inprobe) { | |
572 | if (dtrace_handle_trap(type, state)) { | |
573 | goto common_return; | |
574 | } | |
575 | } | |
576 | ||
2d21ac55 A |
577 | #endif /* CONFIG_DTRACE */ |
578 | ||
0c530ab8 A |
579 | /* |
580 | * we come here with interrupts off as we don't want to recurse | |
581 | * on preemption below. but we do want to re-enable interrupts | |
582 | * as soon we possibly can to hold latency down | |
583 | */ | |
6d2010ae | 584 | if (__improbable(T_PREEMPT == type)) { |
5ba3f43e | 585 | ast_taken_kernel(); |
0c530ab8 | 586 | |
0a7de745 A |
587 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, |
588 | (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, | |
589 | 0, 0, 0, VM_KERNEL_UNSLIDE(kern_ip), 0); | |
94ff46dc A |
590 | |
591 | goto common_return; | |
0c530ab8 | 592 | } |
39037602 | 593 | |
0a7de745 | 594 | user_addr_t kd_vaddr = is_user ? vaddr : VM_KERNEL_UNSLIDE(vaddr); |
39037602 | 595 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, |
0a7de745 A |
596 | (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, |
597 | (unsigned)(kd_vaddr >> 32), (unsigned)kd_vaddr, is_user, | |
598 | VM_KERNEL_UNSLIDE(kern_ip), 0); | |
39037602 A |
599 | |
600 | ||
0c530ab8 A |
601 | if (T_PAGE_FAULT == type) { |
602 | /* | |
603 | * assume we're faulting in the kernel map | |
604 | */ | |
605 | map = kernel_map; | |
606 | ||
f427ee49 A |
607 | if (__probable((thread != THREAD_NULL) && (thread->map != kernel_map) && |
608 | (vaddr < VM_MAX_USER_PAGE_ADDRESS))) { | |
609 | /* fault occurred in userspace */ | |
610 | map = thread->map; | |
611 | ||
612 | /* Intercept a potential Supervisor Mode Execute | |
613 | * Protection fault. These criteria identify | |
614 | * both NX faults and SMEP faults, but both | |
615 | * are fatal. We avoid checking PTEs (racy). | |
616 | * (The VM could just redrive a SMEP fault, hence | |
617 | * the intercept). | |
618 | */ | |
619 | if (__improbable((code == (T_PF_PROT | T_PF_EXECUTE)) && | |
620 | (pmap_smep_enabled) && (saved_state->isf.rip == vaddr))) { | |
621 | goto debugger_entry; | |
622 | } | |
0c530ab8 | 623 | |
b0d623f7 | 624 | /* |
f427ee49 A |
625 | * Additionally check for SMAP faults... |
626 | * which are characterized by page-present and | |
627 | * the AC bit unset (i.e. not from copyin/out path). | |
0c530ab8 | 628 | */ |
f427ee49 A |
629 | if (__improbable(code & T_PF_PROT && |
630 | pmap_smap_enabled && | |
631 | (saved_state->isf.rflags & EFL_AC) == 0)) { | |
632 | goto debugger_entry; | |
0c530ab8 | 633 | } |
13f56ec4 | 634 | |
f427ee49 A |
635 | /* |
636 | * If we're not sharing cr3 with the user | |
637 | * and we faulted in copyio, | |
638 | * then switch cr3 here and dismiss the fault. | |
639 | */ | |
640 | if (no_shared_cr3 && | |
641 | (thread->machine.specFlags & CopyIOActive) && | |
642 | map->pmap->pm_cr3 != get_cr3_base()) { | |
643 | pmap_assert(current_cpu_datap()->cpu_pmap_pcid_enabled == FALSE); | |
644 | set_cr3_raw(map->pmap->pm_cr3); | |
645 | return; | |
646 | } | |
647 | if (__improbable(vaddr < PAGE_SIZE) && | |
648 | ((thread->machine.specFlags & CopyIOActive) == 0)) { | |
649 | goto debugger_entry; | |
b0d623f7 | 650 | } |
0c530ab8 A |
651 | } |
652 | } | |
0c530ab8 A |
653 | |
654 | (void) ml_set_interrupts_enabled(intr); | |
655 | ||
1c79356b | 656 | switch (type) { |
0a7de745 | 657 | case T_NO_FPU: |
1c79356b | 658 | fpnoextflt(); |
94ff46dc | 659 | goto common_return; |
1c79356b | 660 | |
0a7de745 | 661 | case T_FPU_FAULT: |
1c79356b | 662 | fpextovrflt(); |
94ff46dc | 663 | goto common_return; |
1c79356b | 664 | |
0a7de745 | 665 | case T_FLOATING_POINT_ERROR: |
1c79356b | 666 | fpexterrflt(); |
94ff46dc | 667 | goto common_return; |
1c79356b | 668 | |
0a7de745 | 669 | case T_SSE_FLOAT_ERROR: |
d26ffc64 | 670 | fpSSEexterrflt(); |
94ff46dc | 671 | goto common_return; |
d26ffc64 | 672 | |
0a7de745 | 673 | case T_INVALID_OPCODE: |
d26ffc64 A |
674 | fpUDflt(kern_ip); |
675 | goto debugger_entry; | |
676 | ||
0a7de745 A |
677 | case T_DEBUG: |
678 | if ((saved_state->isf.rflags & EFL_TF) == 0 && NO_WATCHPOINTS) { | |
679 | /* We've somehow encountered a debug | |
680 | * register match that does not belong | |
681 | * to the kernel debugger. | |
682 | * This isn't supposed to happen. | |
683 | */ | |
684 | reset_dr7(); | |
94ff46dc | 685 | goto common_return; |
0a7de745 A |
686 | } |
687 | goto debugger_entry; | |
688 | case T_INT3: | |
689 | goto debugger_entry; | |
690 | case T_PAGE_FAULT: | |
0c530ab8 | 691 | |
2d21ac55 | 692 | #if CONFIG_DTRACE |
cb323159 | 693 | if (thread != THREAD_NULL && thread->t_dtrace_inprobe) { /* Executing under dtrace_probe? */ |
2d21ac55 A |
694 | if (dtrace_tally_fault(vaddr)) { /* Should a fault under dtrace be ignored? */ |
695 | /* | |
696 | * DTrace has "anticipated" the possibility of this fault, and has | |
697 | * established the suitable recovery state. Drop down now into the | |
0a7de745 | 698 | * recovery handling code in "case T_GENERAL_PROTECTION:". |
2d21ac55 A |
699 | */ |
700 | goto FALL_THROUGH; | |
701 | } | |
702 | } | |
703 | #endif /* CONFIG_DTRACE */ | |
0a7de745 | 704 | |
7ddcb079 A |
705 | prot = VM_PROT_READ; |
706 | ||
0a7de745 A |
707 | if (code & T_PF_WRITE) { |
708 | prot |= VM_PROT_WRITE; | |
709 | } | |
710 | if (code & T_PF_EXECUTE) { | |
711 | prot |= VM_PROT_EXECUTE; | |
712 | } | |
7ddcb079 | 713 | |
d190cdc3 | 714 | fault_result = result = vm_fault(map, |
0a7de745 A |
715 | vaddr, |
716 | prot, | |
717 | FALSE, VM_KERN_MEMORY_NONE, | |
718 | THREAD_UNINT, NULL, 0); | |
0c530ab8 | 719 | |
1c79356b | 720 | if (result == KERN_SUCCESS) { |
94ff46dc | 721 | goto common_return; |
1c79356b | 722 | } |
0c530ab8 A |
723 | /* |
724 | * fall through | |
725 | */ | |
2d21ac55 A |
726 | #if CONFIG_DTRACE |
727 | FALL_THROUGH: | |
728 | #endif /* CONFIG_DTRACE */ | |
1c79356b | 729 | |
0a7de745 | 730 | case T_GENERAL_PROTECTION: |
1c79356b A |
731 | /* |
732 | * If there is a failure recovery address | |
733 | * for this fault, go there. | |
734 | */ | |
0a7de745 A |
735 | for (rp = recover_table; rp < recover_table_end; rp++) { |
736 | if (kern_ip == rp->fault_addr) { | |
737 | set_recovery_ip(saved_state, rp->recover_addr); | |
94ff46dc | 738 | goto common_return; |
1c79356b | 739 | } |
1c79356b A |
740 | } |
741 | ||
742 | /* | |
0c530ab8 | 743 | * Check thread recovery address also. |
1c79356b | 744 | */ |
6d2010ae | 745 | if (thread != THREAD_NULL && thread->recover) { |
b0d623f7 | 746 | set_recovery_ip(saved_state, thread->recover); |
0c530ab8 | 747 | thread->recover = 0; |
94ff46dc | 748 | goto common_return; |
1c79356b | 749 | } |
f427ee49 A |
750 | /* |
751 | * Unanticipated page-fault errors in kernel | |
752 | * should not happen. | |
753 | * | |
754 | * fall through... | |
755 | */ | |
756 | OS_FALLTHROUGH; | |
0a7de745 | 757 | default: |
91447636 A |
758 | /* |
759 | * Exception 15 is reserved but some chips may generate it | |
760 | * spuriously. Seen at startup on AMD Athlon-64. | |
761 | */ | |
0a7de745 A |
762 | if (type == 15) { |
763 | kprintf("kernel_trap() ignoring spurious trap 15\n"); | |
94ff46dc | 764 | goto common_return; |
91447636 | 765 | } |
0c530ab8 A |
766 | debugger_entry: |
767 | /* Ensure that the i386_kernel_state at the base of the | |
768 | * current thread's stack (if any) is synchronized with the | |
769 | * context at the moment of the trap, to facilitate | |
770 | * access through the debugger. | |
1c79356b | 771 | */ |
b0d623f7 | 772 | sync_iss_to_iks(state); |
1c79356b | 773 | #if MACH_KDP |
0a7de745 | 774 | if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr)) { |
94ff46dc | 775 | goto common_return; |
0a7de745 | 776 | } |
2d21ac55 | 777 | #endif |
4452a7af | 778 | } |
316670eb | 779 | pal_cli(); |
d190cdc3 | 780 | panic_trap(saved_state, trap_pl, fault_result); |
0c530ab8 A |
781 | /* |
782 | * NO RETURN | |
783 | */ | |
94ff46dc A |
784 | |
785 | common_return: | |
786 | #if DEVELOPMENT || DEBUG | |
787 | if (traptrace_index != TRAPTRACE_INVALID_INDEX) { | |
788 | traptrace_end(traptrace_index, mach_absolute_time()); | |
789 | } | |
790 | #endif | |
791 | return; | |
0c530ab8 A |
792 | } |
793 | ||
b0d623f7 A |
794 | static void |
795 | set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip) | |
796 | { | |
0a7de745 | 797 | saved_state->isf.rip = ip; |
b0d623f7 | 798 | } |
0c530ab8 | 799 | |
b0d623f7 | 800 | static void |
d190cdc3 | 801 | panic_trap(x86_saved_state64_t *regs, uint32_t pl, kern_return_t fault_result) |
4452a7af | 802 | { |
0a7de745 A |
803 | const char *trapname = "Unknown"; |
804 | pal_cr_t cr0, cr2, cr3, cr4; | |
805 | boolean_t potential_smep_fault = FALSE, potential_kernel_NX_fault = FALSE; | |
806 | boolean_t potential_smap_fault = FALSE; | |
0c530ab8 | 807 | |
6d2010ae A |
808 | pal_get_control_registers( &cr0, &cr2, &cr3, &cr4 ); |
809 | assert(ml_get_interrupts_enabled() == FALSE); | |
810 | current_cpu_datap()->cpu_fatal_trap_state = regs; | |
2d21ac55 A |
811 | /* |
812 | * Issue an I/O port read if one has been requested - this is an | |
813 | * event logic analyzers can use as a trigger point. | |
814 | */ | |
f427ee49 | 815 | panic_notify(); |
0c530ab8 | 816 | |
5c9f4661 A |
817 | kprintf("CPU %d panic trap number 0x%x, rip 0x%016llx\n", |
818 | cpu_number(), regs->isf.trapno, regs->isf.rip); | |
b0d623f7 | 819 | kprintf("cr0 0x%016llx cr2 0x%016llx cr3 0x%016llx cr4 0x%016llx\n", |
0a7de745 | 820 | cr0, cr2, cr3, cr4); |
4452a7af | 821 | |
0a7de745 A |
822 | if (regs->isf.trapno < TRAP_TYPES) { |
823 | trapname = trap_type[regs->isf.trapno]; | |
824 | } | |
7ddcb079 | 825 | |
316670eb A |
826 | if ((regs->isf.trapno == T_PAGE_FAULT) && (regs->isf.err == (T_PF_PROT | T_PF_EXECUTE)) && (regs->isf.rip == regs->cr2)) { |
827 | if (pmap_smep_enabled && (regs->isf.rip < VM_MAX_USER_PAGE_ADDRESS)) { | |
828 | potential_smep_fault = TRUE; | |
829 | } else if (regs->isf.rip >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) { | |
830 | potential_kernel_NX_fault = TRUE; | |
831 | } | |
04b8595b | 832 | } else if (pmap_smap_enabled && |
0a7de745 A |
833 | regs->isf.trapno == T_PAGE_FAULT && |
834 | regs->isf.err & T_PF_PROT && | |
835 | regs->cr2 < VM_MAX_USER_PAGE_ADDRESS && | |
836 | regs->isf.rip >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) { | |
04b8595b | 837 | potential_smap_fault = TRUE; |
13f56ec4 A |
838 | } |
839 | ||
b0d623f7 A |
840 | #undef panic |
841 | panic("Kernel trap at 0x%016llx, type %d=%s, registers:\n" | |
0a7de745 A |
842 | "CR0: 0x%016llx, CR2: 0x%016llx, CR3: 0x%016llx, CR4: 0x%016llx\n" |
843 | "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" | |
844 | "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n" | |
845 | "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" | |
846 | "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" | |
847 | "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" | |
848 | "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s%s, PL: %d, VF: %d\n", | |
849 | regs->isf.rip, regs->isf.trapno, trapname, | |
850 | cr0, cr2, cr3, cr4, | |
851 | regs->rax, regs->rbx, regs->rcx, regs->rdx, | |
852 | regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi, | |
853 | regs->r8, regs->r9, regs->r10, regs->r11, | |
854 | regs->r12, regs->r13, regs->r14, regs->r15, | |
855 | regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF, | |
856 | regs->isf.ss & 0xFFFF, regs->cr2, regs->isf.err, regs->isf.cpu, | |
857 | virtualized ? " VMM" : "", | |
858 | potential_kernel_NX_fault ? " Kernel NX fault" : "", | |
859 | potential_smep_fault ? " SMEP/User NX fault" : "", | |
860 | potential_smap_fault ? " SMAP fault" : "", | |
861 | pl, | |
862 | fault_result); | |
4452a7af A |
863 | } |
864 | ||
2d21ac55 A |
865 | #if CONFIG_DTRACE |
866 | extern kern_return_t dtrace_user_probe(x86_saved_state_t *); | |
867 | #endif | |
868 | ||
d26ffc64 A |
869 | #if DEBUG |
870 | uint32_t fsigs[2]; | |
871 | uint32_t fsigns, fsigcs; | |
872 | #endif | |
873 | ||
1c79356b A |
874 | /* |
875 | * Trap from user mode. | |
876 | */ | |
877 | void | |
878 | user_trap( | |
0c530ab8 | 879 | x86_saved_state_t *saved_state) |
1c79356b | 880 | { |
0a7de745 A |
881 | int exc; |
882 | int err; | |
883 | mach_exception_code_t code; | |
2d21ac55 | 884 | mach_exception_subcode_t subcode; |
0a7de745 A |
885 | int type; |
886 | user_addr_t vaddr; | |
887 | vm_prot_t prot; | |
888 | thread_t thread = current_thread(); | |
889 | kern_return_t kret; | |
890 | user_addr_t rip; | |
891 | unsigned long dr6 = 0; /* 32 bit for i386, 64 bit for x86_64 */ | |
f427ee49 | 892 | int current_cpu = cpu_number(); |
94ff46dc | 893 | #if DEVELOPMENT || DEBUG |
f427ee49 | 894 | bool inspect_cacheline = false; |
94ff46dc A |
895 | uint32_t traptrace_index; |
896 | #endif | |
d9a64523 | 897 | assert((is_saved_state32(saved_state) && !thread_is_64bit_addr(thread)) || |
0a7de745 | 898 | (is_saved_state64(saved_state) && thread_is_64bit_addr(thread))); |
0c530ab8 A |
899 | |
900 | if (is_saved_state64(saved_state)) { | |
0a7de745 | 901 | x86_saved_state64_t *regs; |
0c530ab8 A |
902 | |
903 | regs = saved_state64(saved_state); | |
904 | ||
6d2010ae | 905 | /* Record cpu where state was captured */ |
f427ee49 | 906 | regs->isf.cpu = current_cpu; |
6d2010ae | 907 | |
0c530ab8 | 908 | type = regs->isf.trapno; |
b0d623f7 | 909 | err = (int)regs->isf.err & 0xffff; |
0c530ab8 A |
910 | vaddr = (user_addr_t)regs->cr2; |
911 | rip = (user_addr_t)regs->isf.rip; | |
94ff46dc A |
912 | #if DEVELOPMENT || DEBUG |
913 | traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->rbp); | |
914 | #endif | |
0c530ab8 | 915 | } else { |
0a7de745 | 916 | x86_saved_state32_t *regs; |
0c530ab8 A |
917 | |
918 | regs = saved_state32(saved_state); | |
919 | ||
6d2010ae | 920 | /* Record cpu where state was captured */ |
f427ee49 | 921 | regs->cpu = current_cpu; |
6d2010ae | 922 | |
0c530ab8 A |
923 | type = regs->trapno; |
924 | err = regs->err & 0xffff; | |
925 | vaddr = (user_addr_t)regs->cr2; | |
926 | rip = (user_addr_t)regs->eip; | |
94ff46dc A |
927 | #if DEVELOPMENT || DEBUG |
928 | traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->ebp); | |
929 | #endif | |
1c79356b A |
930 | } |
931 | ||
f427ee49 A |
932 | #if DEVELOPMENT || DEBUG |
933 | /* | |
934 | * Copy the cacheline of code into the thread's instruction stream save area | |
935 | * before enabling interrupts (the assumption is that we have not otherwise faulted or | |
936 | * trapped since the original cache line stores). If the saved code is not valid, | |
937 | * we'll catch it below when we process the copyin() for unhandled faults. | |
938 | */ | |
939 | if (type == T_PAGE_FAULT || type == T_INVALID_OPCODE || type == T_GENERAL_PROTECTION) { | |
940 | #define CACHELINE_SIZE 64 | |
941 | THREAD_TO_PCB(thread)->insn_cacheline[CACHELINE_SIZE] = (uint8_t)(rip & (CACHELINE_SIZE - 1)); | |
942 | bcopy(&cpu_shadowp(current_cpu)->cpu_rtimes[0], | |
943 | &THREAD_TO_PCB(thread)->insn_cacheline[0], | |
944 | sizeof(THREAD_TO_PCB(thread)->insn_cacheline) - 1); | |
945 | inspect_cacheline = true; | |
946 | } | |
947 | #endif | |
94ff46dc | 948 | |
f427ee49 A |
949 | if (type == T_DEBUG) { |
950 | if (thread->machine.ids) { | |
951 | unsigned long clear = 0; | |
952 | /* Stash and clear this processor's DR6 value, in the event | |
953 | * this was a debug register match | |
954 | */ | |
955 | __asm__ volatile ("mov %%db6, %0" : "=r" (dr6)); | |
956 | __asm__ volatile ("mov %0, %%db6" : : "r" (clear)); | |
957 | } | |
958 | /* [Re]Enable LBRs *BEFORE* enabling interrupts to ensure we hit the right CPU */ | |
959 | i386_lbr_enable(); | |
6d2010ae A |
960 | } |
961 | ||
962 | pal_sti(); | |
963 | ||
0a7de745 A |
964 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, |
965 | (MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE, | |
966 | (unsigned)(vaddr >> 32), (unsigned)vaddr, | |
967 | (unsigned)(rip >> 32), (unsigned)rip, 0); | |
0c530ab8 | 968 | |
1c79356b A |
969 | code = 0; |
970 | subcode = 0; | |
91447636 | 971 | exc = 0; |
1c79356b | 972 | |
fe8ab488 | 973 | #if CONFIG_DTRACE |
2d21ac55 A |
974 | /* |
975 | * DTrace does not consume all user traps, only INT_3's for now. | |
976 | * Avoid needlessly calling tempDTraceTrapHook here, and let the | |
977 | * INT_3 case handle them. | |
978 | */ | |
fe8ab488 | 979 | #endif |
0a7de745 | 980 | |
b0d623f7 | 981 | DEBUG_KPRINT_SYSCALL_MASK(1, |
0a7de745 A |
982 | "user_trap: type=0x%x(%s) err=0x%x cr2=%p rip=%p\n", |
983 | type, trap_type[type], err, (void *)(long) vaddr, (void *)(long) rip); | |
1c79356b | 984 | |
0a7de745 A |
985 | switch (type) { |
986 | case T_DIVIDE_ERROR: | |
1c79356b A |
987 | exc = EXC_ARITHMETIC; |
988 | code = EXC_I386_DIV; | |
989 | break; | |
990 | ||
0a7de745 A |
991 | case T_DEBUG: |
992 | { | |
993 | pcb_t pcb; | |
994 | /* | |
995 | * Update the PCB with this processor's DR6 value | |
996 | * in the event this was a debug register match. | |
997 | */ | |
998 | pcb = THREAD_TO_PCB(thread); | |
999 | if (pcb->ids) { | |
0c530ab8 | 1000 | /* |
0a7de745 A |
1001 | * We can get and set the status register |
1002 | * in 32-bit mode even on a 64-bit thread | |
1003 | * because the high order bits are not | |
1004 | * used on x86_64 | |
0c530ab8 | 1005 | */ |
0a7de745 A |
1006 | if (thread_is_64bit_addr(thread)) { |
1007 | x86_debug_state64_t *ids = pcb->ids; | |
1008 | ids->dr6 = dr6; | |
1009 | } else { /* 32 bit thread */ | |
1010 | x86_debug_state32_t *ids = pcb->ids; | |
1011 | ids->dr6 = (uint32_t) dr6; | |
0c530ab8 | 1012 | } |
0c530ab8 | 1013 | } |
0a7de745 A |
1014 | exc = EXC_BREAKPOINT; |
1015 | code = EXC_I386_SGL; | |
1016 | break; | |
1017 | } | |
1018 | case T_INT3: | |
2d21ac55 | 1019 | #if CONFIG_DTRACE |
0a7de745 | 1020 | if (dtrace_user_probe(saved_state) == KERN_SUCCESS) { |
2d21ac55 | 1021 | return; /* If it succeeds, we are done... */ |
0a7de745 | 1022 | } |
2d21ac55 | 1023 | #endif |
1c79356b A |
1024 | exc = EXC_BREAKPOINT; |
1025 | code = EXC_I386_BPT; | |
1026 | break; | |
1027 | ||
0a7de745 | 1028 | case T_OVERFLOW: |
1c79356b A |
1029 | exc = EXC_ARITHMETIC; |
1030 | code = EXC_I386_INTO; | |
1031 | break; | |
1032 | ||
0a7de745 | 1033 | case T_OUT_OF_BOUNDS: |
1c79356b A |
1034 | exc = EXC_SOFTWARE; |
1035 | code = EXC_I386_BOUND; | |
1036 | break; | |
1037 | ||
0a7de745 | 1038 | case T_INVALID_OPCODE: |
94ff46dc A |
1039 | if (fpUDflt(rip) == 1) { |
1040 | exc = EXC_BAD_INSTRUCTION; | |
1041 | code = EXC_I386_INVOP; | |
1042 | } | |
1c79356b A |
1043 | break; |
1044 | ||
0a7de745 | 1045 | case T_NO_FPU: |
1c79356b | 1046 | fpnoextflt(); |
94ff46dc | 1047 | break; |
1c79356b | 1048 | |
0a7de745 | 1049 | case T_FPU_FAULT: |
94ff46dc A |
1050 | fpextovrflt(); |
1051 | /* | |
1052 | * Raise exception. | |
1053 | */ | |
1054 | exc = EXC_BAD_ACCESS; | |
1055 | code = VM_PROT_READ | VM_PROT_EXECUTE; | |
1056 | subcode = 0; | |
1057 | break; | |
1c79356b | 1058 | |
0a7de745 | 1059 | case T_INVALID_TSS: /* invalid TSS == iret with NT flag set */ |
1c79356b A |
1060 | exc = EXC_BAD_INSTRUCTION; |
1061 | code = EXC_I386_INVTSSFLT; | |
0c530ab8 | 1062 | subcode = err; |
1c79356b A |
1063 | break; |
1064 | ||
0a7de745 | 1065 | case T_SEGMENT_NOT_PRESENT: |
1c79356b A |
1066 | exc = EXC_BAD_INSTRUCTION; |
1067 | code = EXC_I386_SEGNPFLT; | |
0c530ab8 | 1068 | subcode = err; |
1c79356b A |
1069 | break; |
1070 | ||
0a7de745 | 1071 | case T_STACK_FAULT: |
1c79356b A |
1072 | exc = EXC_BAD_INSTRUCTION; |
1073 | code = EXC_I386_STKFLT; | |
0c530ab8 | 1074 | subcode = err; |
1c79356b A |
1075 | break; |
1076 | ||
0a7de745 | 1077 | case T_GENERAL_PROTECTION: |
2d21ac55 A |
1078 | /* |
1079 | * There's a wide range of circumstances which generate this | |
1080 | * class of exception. From user-space, many involve bad | |
1081 | * addresses (such as a non-canonical 64-bit address). | |
1082 | * So we map this to EXC_BAD_ACCESS (and thereby SIGSEGV). | |
1083 | * The trouble is cr2 doesn't contain the faulting address; | |
1084 | * we'd need to decode the faulting instruction to really | |
1085 | * determine this. We'll leave that to debuggers. | |
1086 | * However, attempted execution of privileged instructions | |
1087 | * (e.g. cli) also generate GP faults and so we map these to | |
1088 | * to EXC_BAD_ACCESS (and thence SIGSEGV) also - rather than | |
1089 | * EXC_BAD_INSTRUCTION which is more accurate. We just can't | |
1090 | * win! | |
0a7de745 | 1091 | */ |
2d21ac55 | 1092 | exc = EXC_BAD_ACCESS; |
1c79356b | 1093 | code = EXC_I386_GPFLT; |
0c530ab8 | 1094 | subcode = err; |
1c79356b A |
1095 | break; |
1096 | ||
0a7de745 A |
1097 | case T_PAGE_FAULT: |
1098 | { | |
1099 | prot = VM_PROT_READ; | |
0c530ab8 | 1100 | |
0a7de745 A |
1101 | if (err & T_PF_WRITE) { |
1102 | prot |= VM_PROT_WRITE; | |
1103 | } | |
1104 | if (__improbable(err & T_PF_EXECUTE)) { | |
1105 | prot |= VM_PROT_EXECUTE; | |
1106 | } | |
d26ffc64 A |
1107 | #if DEVELOPMENT || DEBUG |
1108 | uint32_t fsig = 0; | |
1109 | fsig = thread_fpsimd_hash(thread); | |
1110 | #if DEBUG | |
1111 | fsigs[0] = fsig; | |
1112 | #endif | |
1113 | #endif | |
39236c6e | 1114 | kret = vm_fault(thread->map, |
0a7de745 A |
1115 | vaddr, |
1116 | prot, FALSE, VM_KERN_MEMORY_NONE, | |
1117 | THREAD_ABORTSAFE, NULL, 0); | |
d26ffc64 A |
1118 | #if DEVELOPMENT || DEBUG |
1119 | if (fsig) { | |
1120 | uint32_t fsig2 = thread_fpsimd_hash(thread); | |
1121 | #if DEBUG | |
1122 | fsigcs++; | |
1123 | fsigs[1] = fsig2; | |
1124 | #endif | |
1125 | if (fsig != fsig2) { | |
1126 | panic("FP/SIMD state hash mismatch across fault thread: %p 0x%x->0x%x", thread, fsig, fsig2); | |
1127 | } | |
1128 | } else { | |
1129 | #if DEBUG | |
1130 | fsigns++; | |
1131 | #endif | |
1132 | } | |
1133 | #endif | |
db609669 | 1134 | if (__probable((kret == KERN_SUCCESS) || (kret == KERN_ABORTED))) { |
94ff46dc A |
1135 | break; |
1136 | } else if (__improbable(kret == KERN_FAILURE)) { | |
1137 | /* | |
1138 | * For a user trap, vm_fault() should never return KERN_FAILURE. | |
1139 | * If it does, we're leaking preemption disables somewhere in the kernel. | |
1140 | */ | |
cb323159 A |
1141 | panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread); |
1142 | } | |
1143 | ||
94ff46dc A |
1144 | /* PAL debug hook (empty on x86) */ |
1145 | pal_dbg_page_fault(thread, vaddr, kret); | |
1146 | exc = EXC_BAD_ACCESS; | |
1147 | code = kret; | |
1148 | subcode = vaddr; | |
1149 | } | |
0a7de745 | 1150 | break; |
1c79356b | 1151 | |
0a7de745 | 1152 | case T_SSE_FLOAT_ERROR: |
94ff46dc A |
1153 | fpSSEexterrflt(); |
1154 | exc = EXC_ARITHMETIC; | |
1155 | code = EXC_I386_SSEEXTERR; | |
1156 | subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_MXCSR; | |
1157 | break; | |
0c530ab8 A |
1158 | |
1159 | ||
0a7de745 | 1160 | case T_FLOATING_POINT_ERROR: |
94ff46dc A |
1161 | fpexterrflt(); |
1162 | exc = EXC_ARITHMETIC; | |
1163 | code = EXC_I386_EXTERR; | |
1164 | subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_status; | |
1165 | break; | |
1c79356b | 1166 | |
0a7de745 | 1167 | case T_DTRACE_RET: |
2d21ac55 | 1168 | #if CONFIG_DTRACE |
0a7de745 | 1169 | if (dtrace_user_probe(saved_state) == KERN_SUCCESS) { |
2d21ac55 | 1170 | return; /* If it succeeds, we are done... */ |
0a7de745 | 1171 | } |
2d21ac55 A |
1172 | #endif |
1173 | /* | |
1174 | * If we get an INT 0x7f when we do not expect to, | |
1175 | * treat it as an illegal instruction | |
1176 | */ | |
1177 | exc = EXC_BAD_INSTRUCTION; | |
1178 | code = EXC_I386_INVOP; | |
1179 | break; | |
1180 | ||
0a7de745 | 1181 | default: |
2d21ac55 | 1182 | panic("Unexpected user trap, type %d", type); |
1c79356b | 1183 | } |
94ff46dc | 1184 | |
f427ee49 A |
1185 | if (exc != 0) { |
1186 | uint16_t cs; | |
1187 | boolean_t intrs; | |
1188 | ||
1189 | if (is_saved_state64(saved_state)) { | |
1190 | cs = saved_state64(saved_state)->isf.cs; | |
1191 | } else { | |
1192 | cs = saved_state32(saved_state)->cs; | |
1193 | } | |
1194 | ||
1195 | if (last_branch_support_enabled) { | |
1196 | intrs = ml_set_interrupts_enabled(FALSE); | |
1197 | /* | |
1198 | * This is a bit racy (it's possible for this thread to migrate to another CPU, then | |
1199 | * migrate back, but that seems rather rare in practice), but good enough to ensure | |
1200 | * the LBRs are saved before proceeding with exception/signal dispatch. | |
1201 | */ | |
1202 | if (current_cpu == cpu_number()) { | |
1203 | i386_lbr_synch(thread); | |
1204 | } | |
1205 | ml_set_interrupts_enabled(intrs); | |
1206 | } | |
1207 | ||
1208 | /* | |
1209 | * Do not try to copyin from the instruction stream if the page fault was due | |
1210 | * to an access to rip and was unhandled. | |
1211 | * Do not deal with cases when %cs != USER[64]_CS | |
1212 | * And of course there's no need to copy the instruction stream if the boot-arg | |
1213 | * was set to 0. | |
1214 | */ | |
1215 | if (insn_copyin_count > 0 && | |
1216 | (cs == USER64_CS || cs == USER_CS) && (type != T_PAGE_FAULT || vaddr != rip)) { | |
94ff46dc | 1217 | #if DEVELOPMENT || DEBUG |
f427ee49 A |
1218 | copy_instruction_stream(thread, rip, type, inspect_cacheline); |
1219 | #else | |
1220 | copy_instruction_stream(thread, rip, type); | |
94ff46dc | 1221 | #endif |
f427ee49 | 1222 | } |
94ff46dc | 1223 | |
f427ee49 A |
1224 | #if DEVELOPMENT || DEBUG |
1225 | if (traptrace_index != TRAPTRACE_INVALID_INDEX) { | |
1226 | traptrace_end(traptrace_index, mach_absolute_time()); | |
1227 | } | |
1228 | #endif | |
94ff46dc A |
1229 | /* |
1230 | * Note: Codepaths that directly return from user_trap() have pending | |
1231 | * ASTs processed in locore | |
1232 | */ | |
1233 | i386_exception(exc, code, subcode); | |
1234 | /* NOTREACHED */ | |
f427ee49 A |
1235 | } else { |
1236 | #if DEVELOPMENT || DEBUG | |
1237 | if (traptrace_index != TRAPTRACE_INVALID_INDEX) { | |
1238 | traptrace_end(traptrace_index, mach_absolute_time()); | |
1239 | } | |
1240 | #endif | |
1241 | } | |
1242 | } | |
1243 | ||
1244 | /* | |
1245 | * Copyin up to x86_INSTRUCTION_STATE_MAX_INSN_BYTES bytes from the page that includes `rip`, | |
1246 | * ensuring that we stay on the same page, clipping the start or end, as needed. | |
1247 | * Add the clipped amount back at the start or end, depending on where it fits. | |
1248 | * Consult the variable populated by the boot-arg `insn_capcnt' | |
1249 | */ | |
1250 | static __attribute__((noinline)) void | |
1251 | copy_instruction_stream(thread_t thread, uint64_t rip, int __unused trap_code | |
1252 | #if DEVELOPMENT || DEBUG | |
1253 | , bool inspect_cacheline | |
1254 | #endif | |
1255 | ) | |
1256 | { | |
1257 | #if x86_INSTRUCTION_STATE_MAX_INSN_BYTES > 4096 | |
1258 | #error x86_INSTRUCTION_STATE_MAX_INSN_BYTES cannot exceed a page in size. | |
1259 | #endif | |
1260 | pcb_t pcb = THREAD_TO_PCB(thread); | |
1261 | vm_map_offset_t pagemask = ~vm_map_page_mask(current_map()); | |
1262 | vm_map_offset_t rip_page = rip & pagemask; | |
1263 | vm_map_offset_t start_addr; | |
1264 | vm_map_offset_t insn_offset; | |
1265 | vm_map_offset_t end_addr = rip + (insn_copyin_count / 2); | |
1266 | void *stack_buffer; | |
1267 | int copyin_err = 0; | |
1268 | #if defined(MACH_BSD) && (DEVELOPMENT || DEBUG) | |
1269 | void *procname; | |
1270 | #endif | |
1271 | ||
1272 | #if DEVELOPMENT || DEBUG | |
1273 | assert(insn_copyin_count <= x86_INSTRUCTION_STATE_MAX_INSN_BYTES); | |
1274 | #else | |
1275 | if (insn_copyin_count > x86_INSTRUCTION_STATE_MAX_INSN_BYTES || | |
1276 | insn_copyin_count < 64 /* CACHELINE_SIZE */) { | |
1277 | return; | |
1278 | } | |
1279 | #endif | |
1280 | ||
1281 | #pragma clang diagnostic push | |
1282 | #pragma clang diagnostic ignored "-Walloca" | |
1283 | stack_buffer = __builtin_alloca(insn_copyin_count); | |
1284 | #pragma clang diagnostic pop | |
1285 | ||
1286 | if (rip >= (insn_copyin_count / 2)) { | |
1287 | start_addr = rip - (insn_copyin_count / 2); | |
1288 | } else { | |
1289 | start_addr = 0; | |
1290 | } | |
1291 | ||
1292 | if (start_addr < rip_page) { | |
1293 | insn_offset = (insn_copyin_count / 2) - (rip_page - start_addr); | |
1294 | end_addr += (rip_page - start_addr); | |
1295 | start_addr = rip_page; | |
1296 | } else if (end_addr >= (rip_page + (~pagemask + 1))) { | |
1297 | start_addr -= (end_addr - (rip_page + (~pagemask + 1))); /* Adjust start address backward */ | |
1298 | /* Adjust instruction offset due to start address change */ | |
1299 | insn_offset = (insn_copyin_count / 2) + (end_addr - (rip_page + (~pagemask + 1))); | |
1300 | end_addr = rip_page + (~pagemask + 1); /* clip to the start of the next page (non-inclusive */ | |
1301 | } else { | |
1302 | insn_offset = insn_copyin_count / 2; | |
1303 | } | |
1304 | ||
1305 | disable_preemption(); /* Prevent copyin from faulting in the instruction stream */ | |
1306 | if ( | |
1307 | #if DEVELOPMENT || DEBUG | |
1308 | (insnstream_force_cacheline_mismatch < 2) && | |
1309 | #endif | |
1310 | ((end_addr > start_addr) && (copyin_err = copyin(start_addr, stack_buffer, end_addr - start_addr)) == 0)) { | |
1311 | enable_preemption(); | |
1312 | ||
1313 | if (pcb->insn_state == 0) { | |
1314 | pcb->insn_state = kalloc(sizeof(x86_instruction_state_t)); | |
1315 | } | |
1316 | ||
1317 | if (pcb->insn_state != 0) { | |
1318 | bcopy(stack_buffer, pcb->insn_state->insn_bytes, end_addr - start_addr); | |
1319 | bzero(&pcb->insn_state->insn_bytes[end_addr - start_addr], | |
1320 | insn_copyin_count - (end_addr - start_addr)); | |
1321 | ||
1322 | pcb->insn_state->insn_stream_valid_bytes = (int)(end_addr - start_addr); | |
1323 | pcb->insn_state->insn_offset = (int)insn_offset; | |
1324 | ||
1325 | #if DEVELOPMENT || DEBUG | |
1326 | /* Now try to validate the cacheline we read at early-fault time matches the code | |
1327 | * copied in. Before we do that, we have to make sure the buffer contains a valid | |
1328 | * cacheline by looking for the 2 sentinel values written in the event the cacheline | |
1329 | * could not be copied. | |
1330 | */ | |
1331 | #define CACHELINE_DATA_NOT_PRESENT 0xdeadc0debeefcafeULL | |
1332 | #define CACHELINE_MASK (CACHELINE_SIZE - 1) | |
1333 | ||
1334 | if (inspect_cacheline && | |
1335 | (*(uint64_t *)(uintptr_t)&pcb->insn_cacheline[0] != CACHELINE_DATA_NOT_PRESENT && | |
1336 | *(uint64_t *)(uintptr_t)&pcb->insn_cacheline[8] != CACHELINE_DATA_NOT_PRESENT)) { | |
1337 | /* | |
1338 | * The position of the cacheline in the instruction buffer is at offset | |
1339 | * insn_offset - (rip & CACHELINE_MASK) | |
1340 | */ | |
1341 | if (__improbable((rip & CACHELINE_MASK) > insn_offset)) { | |
1342 | printf("thread %p code cacheline @ %p clipped wrt copied-in code (offset %d)\n", | |
1343 | thread, (void *)(rip & ~CACHELINE_MASK), (int)(rip & CACHELINE_MASK)); | |
1344 | } else if (bcmp(&pcb->insn_state->insn_bytes[insn_offset - (rip & CACHELINE_MASK)], | |
1345 | &pcb->insn_cacheline[0], CACHELINE_SIZE) != 0 | |
1346 | || insnstream_force_cacheline_mismatch | |
1347 | ) { | |
1348 | #if x86_INSTRUCTION_STATE_CACHELINE_SIZE != CACHELINE_SIZE | |
1349 | #error cacheline size mismatch | |
1350 | #endif | |
1351 | bcopy(&pcb->insn_cacheline[0], &pcb->insn_state->insn_cacheline[0], | |
1352 | x86_INSTRUCTION_STATE_CACHELINE_SIZE); | |
1353 | /* Mark the instruction stream as being out-of-synch */ | |
1354 | pcb->insn_state->out_of_synch = 1; | |
1355 | ||
1356 | printf("thread %p code cacheline @ %p mismatches with copied-in code [trap 0x%x]\n", | |
1357 | thread, (void *)(rip & ~CACHELINE_MASK), trap_code); | |
1358 | for (int i = 0; i < 8; i++) { | |
1359 | printf("\t[%d] cl=0x%08llx vs. ci=0x%08llx\n", i, *(uint64_t *)(uintptr_t)&pcb->insn_cacheline[i * 8], | |
1360 | *(uint64_t *)(uintptr_t)&pcb->insn_state->insn_bytes[(i * 8) + insn_offset - (rip & CACHELINE_MASK)]); | |
1361 | } | |
1362 | if (panic_on_cacheline_mismatch) { | |
1363 | panic("Cacheline mismatch while processing unhandled exception."); | |
1364 | } | |
1365 | } else { | |
1366 | printf("thread %p code cacheline @ %p DOES match with copied-in code\n", | |
1367 | thread, (void *)(rip & ~CACHELINE_MASK)); | |
1368 | pcb->insn_state->out_of_synch = 0; | |
1369 | } | |
1370 | } else if (inspect_cacheline) { | |
1371 | printf("thread %p could not capture code cacheline at fault IP %p [offset %d]\n", | |
1372 | (void *)thread, (void *)rip, (int)(insn_offset - (rip & CACHELINE_MASK))); | |
1373 | pcb->insn_state->out_of_synch = 0; | |
1374 | } | |
1375 | #else | |
1376 | pcb->insn_state->out_of_synch = 0; | |
1377 | #endif /* DEVELOPMENT || DEBUG */ | |
1378 | ||
1379 | #if defined(MACH_BSD) && (DEVELOPMENT || DEBUG) | |
1380 | if (panic_on_trap_procname[0] != 0) { | |
1381 | char procnamebuf[65] = {0}; | |
1382 | ||
1383 | if (thread->task->bsd_info != NULL) { | |
1384 | procname = proc_name_address(thread->task->bsd_info); | |
1385 | strlcpy(procnamebuf, procname, sizeof(procnamebuf)); | |
1386 | ||
1387 | if (strcasecmp(panic_on_trap_procname, procnamebuf) == 0 && | |
1388 | ((1U << trap_code) & panic_on_trap_mask) != 0) { | |
1389 | panic("Panic requested on trap type 0x%x for process `%s'", trap_code, | |
1390 | panic_on_trap_procname); | |
1391 | /*NORETURN*/ | |
1392 | } | |
1393 | } | |
1394 | } | |
1395 | #endif /* MACH_BSD && (DEVELOPMENT || DEBUG) */ | |
1396 | } | |
1397 | } else { | |
1398 | enable_preemption(); | |
1399 | ||
1400 | pcb->insn_state_copyin_failure_errorcode = copyin_err; | |
1401 | #if DEVELOPMENT || DEBUG | |
1402 | if (inspect_cacheline && pcb->insn_state == 0) { | |
1403 | pcb->insn_state = kalloc(sizeof(x86_instruction_state_t)); | |
1404 | } | |
1405 | if (pcb->insn_state != 0) { | |
1406 | pcb->insn_state->insn_stream_valid_bytes = 0; | |
1407 | pcb->insn_state->insn_offset = 0; | |
1408 | ||
1409 | if (inspect_cacheline && | |
1410 | (*(uint64_t *)(uintptr_t)&pcb->insn_cacheline[0] != CACHELINE_DATA_NOT_PRESENT && | |
1411 | *(uint64_t *)(uintptr_t)&pcb->insn_cacheline[8] != CACHELINE_DATA_NOT_PRESENT)) { | |
1412 | /* | |
1413 | * We can still copy the cacheline into the instruction state structure | |
1414 | * if it contains valid data | |
1415 | */ | |
1416 | pcb->insn_state->out_of_synch = 1; | |
1417 | bcopy(&pcb->insn_cacheline[0], &pcb->insn_state->insn_cacheline[0], | |
1418 | x86_INSTRUCTION_STATE_CACHELINE_SIZE); | |
1419 | } | |
1420 | } | |
1421 | #endif /* DEVELOPMENT || DEBUG */ | |
94ff46dc | 1422 | } |
1c79356b A |
1423 | } |
1424 | ||
1c79356b A |
1425 | /* |
1426 | * Handle exceptions for i386. | |
1427 | * | |
1428 | * If we are an AT bus machine, we must turn off the AST for a | |
1429 | * delayed floating-point exception. | |
1430 | * | |
1431 | * If we are providing floating-point emulation, we may have | |
1432 | * to retrieve the real register values from the floating point | |
1433 | * emulator. | |
1434 | */ | |
1435 | void | |
1436 | i386_exception( | |
0a7de745 | 1437 | int exc, |
2d21ac55 A |
1438 | mach_exception_code_t code, |
1439 | mach_exception_subcode_t subcode) | |
1c79356b | 1440 | { |
2d21ac55 | 1441 | mach_exception_data_type_t codes[EXCEPTION_CODE_MAX]; |
1c79356b | 1442 | |
b0d623f7 | 1443 | DEBUG_KPRINT_SYSCALL_MACH("i386_exception: exc=%d code=0x%llx subcode=0x%llx\n", |
0a7de745 A |
1444 | exc, code, subcode); |
1445 | codes[0] = code; /* new exception interface */ | |
1c79356b | 1446 | codes[1] = subcode; |
91447636 | 1447 | exception_triage(exc, codes, 2); |
1c79356b A |
1448 | /*NOTREACHED*/ |
1449 | } | |
1450 | ||
0c530ab8 | 1451 | |
fe8ab488 A |
1452 | /* Synchronize a thread's x86_kernel_state (if any) with the given |
1453 | * x86_saved_state_t obtained from the trap/IPI handler; called in | |
0c530ab8 | 1454 | * kernel_trap() prior to entering the debugger, and when receiving |
fe8ab488 A |
1455 | * an "MP_KDP" IPI. Called with null saved_state if an incoming IPI |
1456 | * was detected from the kernel while spinning with interrupts masked. | |
0c530ab8 | 1457 | */ |
0a7de745 | 1458 | |
0c530ab8 | 1459 | void |
b0d623f7 | 1460 | sync_iss_to_iks(x86_saved_state_t *saved_state) |
0c530ab8 | 1461 | { |
5ba3f43e | 1462 | struct x86_kernel_state *iks = NULL; |
0c530ab8 A |
1463 | vm_offset_t kstack; |
1464 | boolean_t record_active_regs = FALSE; | |
1465 | ||
6d2010ae | 1466 | /* The PAL may have a special way to sync registers */ |
0a7de745 | 1467 | if (saved_state && saved_state->flavor == THREAD_STATE_NONE) { |
6d2010ae | 1468 | pal_get_kern_regs( saved_state ); |
0a7de745 | 1469 | } |
6d2010ae | 1470 | |
0a7de745 | 1471 | if (current_thread() != NULL && |
5ba3f43e | 1472 | (kstack = current_thread()->kernel_stack) != 0) { |
0a7de745 | 1473 | x86_saved_state64_t *regs = saved_state64(saved_state); |
0c530ab8 A |
1474 | |
1475 | iks = STACK_IKS(kstack); | |
1476 | ||
6d2010ae | 1477 | /* Did we take the trap/interrupt in kernel mode? */ |
fe8ab488 | 1478 | if (saved_state == NULL || /* NULL => polling in kernel */ |
0a7de745 A |
1479 | regs == USER_REGS64(current_thread())) { |
1480 | record_active_regs = TRUE; | |
1481 | } else { | |
b0d623f7 A |
1482 | iks->k_rbx = regs->rbx; |
1483 | iks->k_rsp = regs->isf.rsp; | |
1484 | iks->k_rbp = regs->rbp; | |
1485 | iks->k_r12 = regs->r12; | |
1486 | iks->k_r13 = regs->r13; | |
1487 | iks->k_r14 = regs->r14; | |
1488 | iks->k_r15 = regs->r15; | |
1489 | iks->k_rip = regs->isf.rip; | |
1490 | } | |
0c530ab8 A |
1491 | } |
1492 | ||
1493 | if (record_active_regs == TRUE) { | |
b0d623f7 | 1494 | /* Show the trap handler path */ |
0a7de745 A |
1495 | __asm__ volatile ("movq %%rbx, %0" : "=m" (iks->k_rbx)); |
1496 | __asm__ volatile ("movq %%rsp, %0" : "=m" (iks->k_rsp)); | |
1497 | __asm__ volatile ("movq %%rbp, %0" : "=m" (iks->k_rbp)); | |
1498 | __asm__ volatile ("movq %%r12, %0" : "=m" (iks->k_r12)); | |
1499 | __asm__ volatile ("movq %%r13, %0" : "=m" (iks->k_r13)); | |
1500 | __asm__ volatile ("movq %%r14, %0" : "=m" (iks->k_r14)); | |
1501 | __asm__ volatile ("movq %%r15, %0" : "=m" (iks->k_r15)); | |
b0d623f7 | 1502 | /* "Current" instruction pointer */ |
0a7de745 A |
1503 | __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" |
1504 | : "=m" (iks->k_rip) | |
1505 | : | |
1506 | : "rax"); | |
0c530ab8 A |
1507 | } |
1508 | } | |
1509 | ||
1510 | /* | |
1511 | * This is used by the NMI interrupt handler (from mp.c) to | |
1512 | * uncondtionally sync the trap handler context to the IKS | |
1513 | * irrespective of whether the NMI was fielded in kernel | |
1514 | * or user space. | |
1515 | */ | |
1516 | void | |
0a7de745 A |
1517 | sync_iss_to_iks_unconditionally(__unused x86_saved_state_t *saved_state) |
1518 | { | |
b0d623f7 | 1519 | struct x86_kernel_state *iks; |
0c530ab8 | 1520 | vm_offset_t kstack; |
0c530ab8 A |
1521 | |
1522 | if ((kstack = current_thread()->kernel_stack) != 0) { | |
0c530ab8 | 1523 | iks = STACK_IKS(kstack); |
b0d623f7 | 1524 | /* Display the trap handler path */ |
0a7de745 A |
1525 | __asm__ volatile ("movq %%rbx, %0" : "=m" (iks->k_rbx)); |
1526 | __asm__ volatile ("movq %%rsp, %0" : "=m" (iks->k_rsp)); | |
1527 | __asm__ volatile ("movq %%rbp, %0" : "=m" (iks->k_rbp)); | |
1528 | __asm__ volatile ("movq %%r12, %0" : "=m" (iks->k_r12)); | |
1529 | __asm__ volatile ("movq %%r13, %0" : "=m" (iks->k_r13)); | |
1530 | __asm__ volatile ("movq %%r14, %0" : "=m" (iks->k_r14)); | |
1531 | __asm__ volatile ("movq %%r15, %0" : "=m" (iks->k_r15)); | |
b0d623f7 | 1532 | /* "Current" instruction pointer */ |
0a7de745 | 1533 | __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" : "=m" (iks->k_rip)::"rax"); |
0c530ab8 A |
1534 | } |
1535 | } | |
00867663 A |
1536 | |
1537 | #if DEBUG | |
5c9f4661 A |
1538 | #define TERI 1 |
1539 | #endif | |
1540 | ||
1541 | #if TERI | |
0a7de745 | 1542 | extern void thread_exception_return_internal(void) __dead2; |
00867663 | 1543 | |
0a7de745 A |
1544 | void |
1545 | thread_exception_return(void) | |
1546 | { | |
00867663 A |
1547 | thread_t thread = current_thread(); |
1548 | ml_set_interrupts_enabled(FALSE); | |
d9a64523 | 1549 | if (thread_is_64bit_addr(thread) != task_has_64Bit_addr(thread->task)) { |
0a7de745 | 1550 | panic("Task/thread bitness mismatch %p %p, task: %d, thread: %d", thread, thread->task, thread_is_64bit_addr(thread), task_has_64Bit_addr(thread->task)); |
00867663 A |
1551 | } |
1552 | ||
d9a64523 | 1553 | if (thread_is_64bit_addr(thread)) { |
00867663 A |
1554 | if ((gdt_desc_p(USER64_CS)->access & ACC_PL_U) == 0) { |
1555 | panic("64-GDT mismatch %p, descriptor: %p", thread, gdt_desc_p(USER64_CS)); | |
1556 | } | |
1557 | } else { | |
0a7de745 A |
1558 | if ((gdt_desc_p(USER_CS)->access & ACC_PL_U) == 0) { |
1559 | panic("32-GDT mismatch %p, descriptor: %p", thread, gdt_desc_p(USER_CS)); | |
00867663 A |
1560 | } |
1561 | } | |
0a7de745 | 1562 | assert(get_preemption_level() == 0); |
00867663 A |
1563 | thread_exception_return_internal(); |
1564 | } | |
1565 | #endif |