2 * Copyright (c) 2005-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/thread.h>
30 #include <mach/thread_status.h>
32 typedef x86_saved_state_t savearea_t
;
36 #include <sys/malloc.h>
38 #include <sys/systm.h>
40 #include <sys/proc_internal.h>
41 #include <sys/kauth.h>
42 #include <sys/dtrace.h>
43 #include <sys/dtrace_impl.h>
44 #include <libkern/OSAtomic.h>
45 #include <kern/thread_call.h>
46 #include <kern/task.h>
47 #include <kern/sched_prim.h>
48 #include <miscfs/devfs/devfs.h>
49 #include <mach/vm_param.h>
50 #include <machine/pal_routines.h>
52 #include <machine/trap.h>
55 * APPLE NOTE: The regmap is used to decode which 64bit uregs[] register
56 * is being accessed when passed the 32bit uregs[] constant (based on
57 * the reg.d translator file). The dtrace_getreg() is smart enough to handle
58 * the register mappings. The register set definitions are the same as
59 * those used by the fasttrap_getreg code.
61 #include "fasttrap_regset.h"
62 static const uint8_t regmap
[19] = {
69 REG_RBP
, /* EBP, REG_FP */
72 REG_RDX
, /* EDX, REG_R1 */
74 REG_RAX
, /* EAX, REG_R0 */
75 REG_TRAPNO
, /* TRAPNO */
77 REG_RIP
, /* EIP, REG_PC */
79 REG_RFL
, /* EFL, REG_PS */
80 REG_RSP
, /* UESP, REG_SP */
84 extern dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
87 dtrace_probe_error(dtrace_state_t
*state
, dtrace_epid_t epid
, int which
,
88 int fltoffs
, int fault
, uint64_t illval
)
91 * For the case of the error probe firing lets
92 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
94 state
->dts_arg_error_illval
= illval
;
95 dtrace_probe( dtrace_probeid_error
, (uint64_t)(uintptr_t)state
, epid
, which
, fltoffs
, fault
);
99 * Atomicity and synchronization
102 dtrace_membar_producer(void)
104 __asm__
volatile("sfence");
108 dtrace_membar_consumer(void)
110 __asm__
volatile("lfence");
114 * Interrupt manipulation
115 * XXX dtrace_getipl() can be called from probe context.
121 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
122 * in osfmk/kern/cpu_data.h
124 /* return get_interrupt_level(); */
125 return (ml_at_interrupt_context() ? 1: 0);
131 typedef struct xcArg
{
138 xcRemote( void *foo
)
140 xcArg_t
*pArg
= (xcArg_t
*)foo
;
142 if ( pArg
->cpu
== CPU
->cpu_id
|| pArg
->cpu
== DTRACE_CPUALL
) {
143 (pArg
->f
)(pArg
->arg
);
149 * dtrace_xcall() is not called from probe context.
152 dtrace_xcall(processorid_t cpu
, dtrace_xcall_t f
, void *arg
)
160 if (cpu
== DTRACE_CPUALL
) {
161 mp_cpus_call (CPUMASK_ALL
, ASYNC
, xcRemote
, (void*)&xcArg
);
164 mp_cpus_call (cpu_to_cpumask((cpu_t
)cpu
), ASYNC
, xcRemote
, (void*)&xcArg
);
172 dtrace_getreg(struct regs
*savearea
, uint_t reg
)
174 boolean_t is64Bit
= proc_is64bit(current_proc());
175 x86_saved_state_t
*regs
= (x86_saved_state_t
*)savearea
;
178 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
191 return (uint64_t)(regs
->ss_64
.rdi
);
193 return (uint64_t)(regs
->ss_64
.rsi
);
195 return (uint64_t)(regs
->ss_64
.rdx
);
197 return (uint64_t)(regs
->ss_64
.rcx
);
199 return (uint64_t)(regs
->ss_64
.r8
);
201 return (uint64_t)(regs
->ss_64
.r9
);
203 return (uint64_t)(regs
->ss_64
.rax
);
205 return (uint64_t)(regs
->ss_64
.rbx
);
207 return (uint64_t)(regs
->ss_64
.rbp
);
209 return (uint64_t)(regs
->ss_64
.r10
);
211 return (uint64_t)(regs
->ss_64
.r11
);
213 return (uint64_t)(regs
->ss_64
.r12
);
215 return (uint64_t)(regs
->ss_64
.r13
);
217 return (uint64_t)(regs
->ss_64
.r14
);
219 return (uint64_t)(regs
->ss_64
.r15
);
221 return (uint64_t)(regs
->ss_64
.fs
);
223 return (uint64_t)(regs
->ss_64
.gs
);
225 return (uint64_t)(regs
->ss_64
.isf
.trapno
);
227 return (uint64_t)(regs
->ss_64
.isf
.err
);
229 return (uint64_t)(regs
->ss_64
.isf
.rip
);
231 return (uint64_t)(regs
->ss_64
.isf
.cs
);
233 return (uint64_t)(regs
->ss_64
.isf
.ss
);
235 return (uint64_t)(regs
->ss_64
.isf
.rflags
);
237 return (uint64_t)(regs
->ss_64
.isf
.rsp
);
241 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
245 } else { /* is 32bit user */
246 /* beyond register SS */
247 if (reg
> x86_SAVED_STATE32_COUNT
- 1) {
248 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
251 return (uint64_t)((unsigned int *)(&(regs
->ss_32
.gs
)))[reg
];
256 dtrace_getvmreg(uint_t ndx
)
261 /* Any change in the vmread final opcode must be reflected in dtrace_handle_trap below. */
262 __asm__
__volatile__(
267 : "=a" (reg
), "+r" (failed
) : "D" ((uint64_t)ndx
));
270 * Check for fault in vmreg first. If DTrace has recovered the fault cause by
271 * vmread above then the value in failed will be unreliable.
273 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ILLOP
)) {
277 /* If vmread succeeded but failed because CF or ZS is 1 report fail. */
279 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
280 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= ndx
;
287 #define RETURN_OFFSET 4
288 #define RETURN_OFFSET64 8
291 dtrace_getustack_common(uint64_t *pcstack
, int pcstack_limit
, user_addr_t pc
,
294 volatile uint16_t *flags
=
295 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
298 uintptr_t oldcontext
= lwp
->lwp_oldcontext
; /* XXX signal stack crawl */
302 boolean_t is64Bit
= proc_is64bit(current_proc());
304 ASSERT(pcstack
== NULL
|| pcstack_limit
> 0);
306 #if 0 /* XXX signal stack crawl */
307 if (p
->p_model
== DATAMODEL_NATIVE
) {
308 s1
= sizeof (struct frame
) + 2 * sizeof (long);
309 s2
= s1
+ sizeof (siginfo_t
);
311 s1
= sizeof (struct frame32
) + 3 * sizeof (int);
312 s2
= s1
+ sizeof (siginfo32_t
);
318 if (pcstack
!= NULL
) {
319 *pcstack
++ = (uint64_t)pc
;
321 if (pcstack_limit
<= 0)
328 #if 0 /* XXX signal stack crawl */
329 if (oldcontext
== sp
+ s1
|| oldcontext
== sp
+ s2
) {
330 if (p
->p_model
== DATAMODEL_NATIVE
) {
331 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
332 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
334 sp
= dtrace_fulword(&gregs
[REG_FP
]);
335 pc
= dtrace_fulword(&gregs
[REG_PC
]);
337 oldcontext
= dtrace_fulword(&ucp
->uc_link
);
339 ucontext32_t
*ucp
= (ucontext32_t
*)oldcontext
;
340 greg32_t
*gregs
= ucp
->uc_mcontext
.gregs
;
342 sp
= dtrace_fuword32(&gregs
[EBP
]);
343 pc
= dtrace_fuword32(&gregs
[EIP
]);
345 oldcontext
= dtrace_fuword32(&ucp
->uc_link
);
352 pc
= dtrace_fuword64((sp
+ RETURN_OFFSET64
));
353 sp
= dtrace_fuword64(sp
);
355 pc
= dtrace_fuword32((sp
+ RETURN_OFFSET
));
356 sp
= dtrace_fuword32(sp
);
360 /* Truncate ustack if the iterator causes fault. */
361 if (*flags
& CPU_DTRACE_FAULT
) {
362 *flags
&= ~CPU_DTRACE_FAULT
;
372 * The return value indicates if we've modified the stack.
375 dtrace_adjust_stack(uint64_t **pcstack
, int *pcstack_limit
, user_addr_t
*pc
,
378 volatile uint16_t *flags
= (volatile uint16_t *) &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
381 boolean_t is64Bit
= proc_is64bit(current_proc());
385 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY
)) {
387 * If we found ourselves in an entry probe, the frame pointer has not
388 * yet been pushed (that happens in the
389 * function prologue). The best approach is to
390 * add the current pc as a missing top of stack,
391 * and back the pc up to the caller, which is stored at the
392 * current stack pointer address since the call
393 * instruction puts it there right before
400 *pc
= dtrace_fuword64(sp
);
402 *pc
= dtrace_fuword32(sp
);
404 /* Truncate ustack if the iterator causes fault. */
405 if (*flags
& CPU_DTRACE_FAULT
) {
406 *flags
&= ~CPU_DTRACE_FAULT
;
410 * We might have a top of stack override, in which case we just
411 * add that frame without question to the top. This
412 * happens in return probes where you have a valid
413 * frame pointer, but it's for the callers frame
414 * and you'd like to add the pc of the return site
417 missing_tos
= cpu_core
[CPU
->cpu_id
].cpuc_missing_tos
;
420 if (missing_tos
!= 0) {
421 if (pcstack
!= NULL
&& pcstack_limit
!= NULL
) {
423 * If the missing top of stack has been filled out, then
424 * we add it and adjust the size.
426 *(*pcstack
)++ = missing_tos
;
430 * return 1 because we would have changed the
431 * stack whether or not it was passed in. This
432 * ensures the stack count is correct
440 dtrace_getupcstack(uint64_t *pcstack
, int pcstack_limit
)
442 thread_t thread
= current_thread();
443 x86_saved_state_t
*regs
;
444 user_addr_t pc
, sp
, fp
;
445 volatile uint16_t *flags
=
446 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
448 boolean_t is64Bit
= proc_is64bit(current_proc());
450 if (*flags
& CPU_DTRACE_FAULT
)
453 if (pcstack_limit
<= 0)
457 * If there's no user context we still need to zero the stack.
462 pal_register_cache_state(thread
, VALID
);
463 regs
= (x86_saved_state_t
*)find_user_regs(thread
);
467 *pcstack
++ = (uint64_t)dtrace_proc_selfpid();
470 if (pcstack_limit
<= 0)
474 pc
= regs
->ss_64
.isf
.rip
;
475 sp
= regs
->ss_64
.isf
.rsp
;
476 fp
= regs
->ss_64
.rbp
;
478 pc
= regs
->ss_32
.eip
;
479 sp
= regs
->ss_32
.uesp
;
480 fp
= regs
->ss_32
.ebp
;
484 * The return value indicates if we've modified the stack.
485 * Since there is nothing else to fix up in either case,
486 * we can safely ignore it here.
488 (void)dtrace_adjust_stack(&pcstack
, &pcstack_limit
, &pc
, sp
);
490 if(pcstack_limit
<= 0)
494 * Note that unlike ppc, the x86 code does not use
495 * CPU_DTRACE_USTACK_FP. This is because x86 always
496 * traces from the fp, even in syscall/profile/fbt
499 n
= dtrace_getustack_common(pcstack
, pcstack_limit
, pc
, fp
);
501 ASSERT(n
<= pcstack_limit
);
507 while (pcstack_limit
-- > 0)
512 dtrace_getustackdepth(void)
514 thread_t thread
= current_thread();
515 x86_saved_state_t
*regs
;
516 user_addr_t pc
, sp
, fp
;
518 boolean_t is64Bit
= proc_is64bit(current_proc());
523 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
))
526 pal_register_cache_state(thread
, VALID
);
527 regs
= (x86_saved_state_t
*)find_user_regs(thread
);
532 pc
= regs
->ss_64
.isf
.rip
;
533 sp
= regs
->ss_64
.isf
.rsp
;
534 fp
= regs
->ss_64
.rbp
;
536 pc
= regs
->ss_32
.eip
;
537 sp
= regs
->ss_32
.uesp
;
538 fp
= regs
->ss_32
.ebp
;
541 if (dtrace_adjust_stack(NULL
, NULL
, &pc
, sp
) == 1) {
543 * we would have adjusted the stack if we had
544 * supplied one (that is what rc == 1 means).
545 * Also, as a side effect, the pc might have
546 * been fixed up, which is good for calling
547 * in to dtrace_getustack_common.
553 * Note that unlike ppc, the x86 code does not use
554 * CPU_DTRACE_USTACK_FP. This is because x86 always
555 * traces from the fp, even in syscall/profile/fbt
559 n
+= dtrace_getustack_common(NULL
, 0, pc
, fp
);
565 dtrace_getufpstack(uint64_t *pcstack
, uint64_t *fpstack
, int pcstack_limit
)
567 thread_t thread
= current_thread();
570 volatile uint16_t *flags
=
571 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
573 uintptr_t oldcontext
;
576 boolean_t is64Bit
= proc_is64bit(current_proc());
578 if (*flags
& CPU_DTRACE_FAULT
)
581 if (pcstack_limit
<= 0)
585 * If there's no user context we still need to zero the stack.
590 regs
= (savearea_t
*)find_user_regs(thread
);
594 *pcstack
++ = (uint64_t)dtrace_proc_selfpid();
597 if (pcstack_limit
<= 0)
600 pc
= regs
->ss_32
.eip
;
601 sp
= regs
->ss_32
.ebp
;
603 #if 0 /* XXX signal stack crawl */
604 oldcontext
= lwp
->lwp_oldcontext
;
606 if (p
->p_model
== DATAMODEL_NATIVE
) {
607 s1
= sizeof (struct frame
) + 2 * sizeof (long);
608 s2
= s1
+ sizeof (siginfo_t
);
610 s1
= sizeof (struct frame32
) + 3 * sizeof (int);
611 s2
= s1
+ sizeof (siginfo32_t
);
615 if(dtrace_adjust_stack(&pcstack
, &pcstack_limit
, &pc
, sp
) == 1) {
620 if (pcstack_limit
<= 0)
625 *pcstack
++ = (uint64_t)pc
;
628 if (pcstack_limit
<= 0)
634 #if 0 /* XXX signal stack crawl */
635 if (oldcontext
== sp
+ s1
|| oldcontext
== sp
+ s2
) {
636 if (p
->p_model
== DATAMODEL_NATIVE
) {
637 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
638 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
640 sp
= dtrace_fulword(&gregs
[REG_FP
]);
641 pc
= dtrace_fulword(&gregs
[REG_PC
]);
643 oldcontext
= dtrace_fulword(&ucp
->uc_link
);
645 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
646 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
648 sp
= dtrace_fuword32(&gregs
[EBP
]);
649 pc
= dtrace_fuword32(&gregs
[EIP
]);
651 oldcontext
= dtrace_fuword32(&ucp
->uc_link
);
658 pc
= dtrace_fuword64((sp
+ RETURN_OFFSET64
));
659 sp
= dtrace_fuword64(sp
);
661 pc
= dtrace_fuword32((sp
+ RETURN_OFFSET
));
662 sp
= dtrace_fuword32(sp
);
666 /* Truncate ustack if the iterator causes fault. */
667 if (*flags
& CPU_DTRACE_FAULT
) {
668 *flags
&= ~CPU_DTRACE_FAULT
;
674 while (pcstack_limit
-- > 0)
679 dtrace_getpcstack(pc_t
*pcstack
, int pcstack_limit
, int aframes
,
682 struct frame
*fp
= (struct frame
*)__builtin_frame_address(0);
683 struct frame
*nextfp
, *minfp
, *stacktop
;
687 uintptr_t caller
= CPU
->cpu_dtrace_caller
;
690 if ((on_intr
= CPU_ON_INTR(CPU
)) != 0)
691 stacktop
= (struct frame
*)dtrace_get_cpu_int_stack_top();
693 stacktop
= (struct frame
*)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size
);
699 if (intrpc
!= NULL
&& depth
< pcstack_limit
)
700 pcstack
[depth
++] = (pc_t
)intrpc
;
702 while (depth
< pcstack_limit
) {
703 nextfp
= *(struct frame
**)fp
;
704 pc
= *(uintptr_t *)(((uintptr_t)fp
) + RETURN_OFFSET64
);
706 if (nextfp
<= minfp
|| nextfp
>= stacktop
) {
709 * Hop from interrupt stack to thread stack.
711 vm_offset_t kstack_base
= dtrace_get_kernel_stack(current_thread());
713 minfp
= (struct frame
*)kstack_base
;
714 stacktop
= (struct frame
*)(kstack_base
+ kernel_stack_size
);
720 * This is the last frame we can process; indicate
721 * that we should return after processing this frame.
727 if (--aframes
== 0 && caller
!= 0) {
729 * We've just run out of artificial frames,
730 * and we have a valid caller -- fill it in
733 ASSERT(depth
< pcstack_limit
);
734 pcstack
[depth
++] = (pc_t
)caller
;
738 if (depth
< pcstack_limit
)
739 pcstack
[depth
++] = (pc_t
)pc
;
743 while (depth
< pcstack_limit
)
744 pcstack
[depth
++] = 0;
754 struct frame
*backchain
;
759 dtrace_getarg(int arg
, int aframes
, dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
)
762 struct frame
*fp
= (struct frame
*)__builtin_frame_address(0);
769 * A total of 6 arguments are passed via registers; any argument with
770 * index of 5 or lower is therefore in a register.
774 for (i
= 1; i
<= aframes
; i
++) {
778 if (dtrace_invop_callsite_pre
!= NULL
779 && pc
> (uintptr_t)dtrace_invop_callsite_pre
780 && pc
<= (uintptr_t)dtrace_invop_callsite_post
) {
782 * In the case of x86_64, we will use the pointer to the
783 * save area structure that was pushed when we took the
784 * trap. To get this structure, we must increment
785 * beyond the frame structure. If the
786 * argument that we're seeking is passed on the stack,
787 * we'll pull the true stack pointer out of the saved
788 * registers and decrement our argument by the number
789 * of arguments passed in registers; if the argument
790 * we're seeking is passed in regsiters, we can just
794 /* fp points to frame of dtrace_invop() activation. */
795 fp
= fp
->backchain
; /* to fbt_perfcallback() activation. */
796 fp
= fp
->backchain
; /* to kernel_trap() activation. */
797 fp
= fp
->backchain
; /* to trap_from_kernel() activation. */
799 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)&fp
[1];
800 x86_saved_state64_t
*saved_state
= saved_state64(tagged_regs
);
803 stack
= (uintptr_t *)(void*)&saved_state
->rdi
;
805 fp
= (struct frame
*)(saved_state
->isf
.rsp
);
806 stack
= (uintptr_t *)&fp
[1]; /* Find marshalled
815 * We know that we did not come through a trap to get into
816 * dtrace_probe() -- We arrive here when the provider has
817 * called dtrace_probe() directly.
818 * The probe ID is the first argument to dtrace_probe().
819 * We must advance beyond that to get the argX.
821 arg
++; /* Advance past probeID */
825 * This shouldn't happen. If the argument is passed in a
826 * register then it should have been, well, passed in a
829 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
834 stack
= (uintptr_t *)&fp
[1]; /* Find marshalled arguments */
837 if (dtrace_canload((uint64_t)(stack
+ arg
), sizeof(uint64_t),
839 /* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
840 val
= dtrace_load64((uint64_t)(stack
+ arg
));
850 dtrace_toxic_ranges(void (*func
)(uintptr_t base
, uintptr_t limit
))
853 * "base" is the smallest toxic address in the range, "limit" is the first
854 * VALID address greater than "base".
856 func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
857 if (VM_MAX_KERNEL_ADDRESS
< ~(uintptr_t)0)
858 func(VM_MAX_KERNEL_ADDRESS
+ 1, ~(uintptr_t)0);
864 extern boolean_t
dtrace_handle_trap(int, x86_saved_state_t
*);
867 dtrace_handle_trap(int trapno
, x86_saved_state_t
*state
)
869 x86_saved_state64_t
*saved_state
= saved_state64(state
);
871 if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT
)) {
876 * General purpose solution would require pulling in disassembler. Right now there
877 * is only one specific case to be handled so it is hardcoded here.
879 if (trapno
== T_INVALID_OPCODE
) {
880 uint8_t *inst
= (uint8_t *)saved_state
->isf
.rip
;
882 /* vmread %rdi, %rax */
883 if (inst
[0] == 0x0f && inst
[1] == 0x78 && inst
[2] == 0xf8) {
884 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
885 saved_state
->isf
.rip
+= 3;