2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
30 #include <kern/thread.h>
31 #include <mach/thread_status.h>
33 typedef x86_saved_state_t savearea_t
;
37 #include <sys/malloc.h>
39 #include <sys/systm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/kauth.h>
43 #include <sys/dtrace.h>
44 #include <sys/dtrace_impl.h>
45 #include <libkern/OSAtomic.h>
46 #include <kern/thread_call.h>
47 #include <kern/task.h>
48 #include <kern/sched_prim.h>
49 #include <miscfs/devfs/devfs.h>
50 #include <mach/vm_param.h>
53 * APPLE NOTE: The regmap is used to decode which 64bit uregs[] register
54 * is being accessed when passed the 32bit uregs[] constant (based on
55 * the reg.d translator file). The dtrace_getreg() is smart enough to handle
56 * the register mappings. The register set definitions are the same as
57 * those used by the fasttrap_getreg code.
59 #include "fasttrap_regset.h"
60 static const uint8_t regmap
[19] = {
67 REG_RBP
, /* EBP, REG_FP */
70 REG_RDX
, /* EDX, REG_R1 */
72 REG_RAX
, /* EAX, REG_R0 */
73 REG_TRAPNO
, /* TRAPNO */
75 REG_RIP
, /* EIP, REG_PC */
77 REG_RFL
, /* EFL, REG_PS */
78 REG_RSP
, /* UESP, REG_SP */
82 extern dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
85 dtrace_probe_error(dtrace_state_t
*state
, dtrace_epid_t epid
, int which
,
86 int fltoffs
, int fault
, uint64_t illval
)
89 * For the case of the error probe firing lets
90 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
92 state
->dts_arg_error_illval
= illval
;
93 dtrace_probe( dtrace_probeid_error
, (uint64_t)(uintptr_t)state
, epid
, which
, fltoffs
, fault
);
97 * Atomicity and synchronization
100 dtrace_membar_producer(void)
102 __asm__
volatile("sfence");
106 dtrace_membar_consumer(void)
108 __asm__
volatile("lfence");
112 * Interrupt manipulation
113 * XXX dtrace_getipl() can be called from probe context.
119 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
120 * in osfmk/kern/cpu_data.h
122 /* return get_interrupt_level(); */
123 return (ml_at_interrupt_context() ? 1: 0);
130 extern void mp_broadcast(
131 void (*action_func
)(void *),
134 typedef struct xcArg
{
141 xcRemote( void *foo
)
143 xcArg_t
*pArg
= (xcArg_t
*)foo
;
145 if ( pArg
->cpu
== CPU
->cpu_id
|| pArg
->cpu
== DTRACE_CPUALL
) {
146 (pArg
->f
)(pArg
->arg
);
151 * dtrace_xcall() is not called from probe context.
154 dtrace_xcall(processorid_t cpu
, dtrace_xcall_t f
, void *arg
)
162 mp_broadcast( xcRemote
, (void *)&xcArg
);
170 dtrace_getreg(struct regs
*savearea
, uint_t reg
)
172 boolean_t is64Bit
= proc_is64bit(current_proc());
173 x86_saved_state_t
*regs
= (x86_saved_state_t
*)savearea
;
184 return (uint64_t)(regs
->ss_64
.rdi
);
186 return (uint64_t)(regs
->ss_64
.rsi
);
188 return (uint64_t)(regs
->ss_64
.rdx
);
190 return (uint64_t)(regs
->ss_64
.rcx
);
192 return (uint64_t)(regs
->ss_64
.r8
);
194 return (uint64_t)(regs
->ss_64
.r9
);
196 return (uint64_t)(regs
->ss_64
.rax
);
198 return (uint64_t)(regs
->ss_64
.rbx
);
200 return (uint64_t)(regs
->ss_64
.rbp
);
202 return (uint64_t)(regs
->ss_64
.r10
);
204 return (uint64_t)(regs
->ss_64
.r11
);
206 return (uint64_t)(regs
->ss_64
.r12
);
208 return (uint64_t)(regs
->ss_64
.r13
);
210 return (uint64_t)(regs
->ss_64
.r14
);
212 return (uint64_t)(regs
->ss_64
.r15
);
214 return (uint64_t)(regs
->ss_64
.fs
);
216 return (uint64_t)(regs
->ss_64
.gs
);
218 return (uint64_t)(regs
->ss_64
.isf
.trapno
);
220 return (uint64_t)(regs
->ss_64
.isf
.err
);
222 return (uint64_t)(regs
->ss_64
.isf
.rip
);
224 return (uint64_t)(regs
->ss_64
.isf
.cs
);
226 return (uint64_t)(regs
->ss_64
.isf
.ss
);
228 return (uint64_t)(regs
->ss_64
.isf
.rflags
);
230 return (uint64_t)(regs
->ss_64
.isf
.rsp
);
234 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
238 } else { /* is 32bit user */
239 /* beyond register SS */
240 if (reg
> x86_SAVED_STATE32_COUNT
- 1) {
241 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
244 return (uint64_t)((unsigned int *)(&(regs
->ss_32
.gs
)))[reg
];
248 #define RETURN_OFFSET 4
249 #define RETURN_OFFSET64 8
252 dtrace_getustack_common(uint64_t *pcstack
, int pcstack_limit
, user_addr_t pc
,
256 volatile uint16_t *flags
=
257 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
259 uintptr_t oldcontext
= lwp
->lwp_oldcontext
; /* XXX signal stack crawl */
263 boolean_t is64Bit
= proc_is64bit(current_proc());
265 ASSERT(pcstack
== NULL
|| pcstack_limit
> 0);
267 #if 0 /* XXX signal stack crawl */
268 if (p
->p_model
== DATAMODEL_NATIVE
) {
269 s1
= sizeof (struct frame
) + 2 * sizeof (long);
270 s2
= s1
+ sizeof (siginfo_t
);
272 s1
= sizeof (struct frame32
) + 3 * sizeof (int);
273 s2
= s1
+ sizeof (siginfo32_t
);
279 if (pcstack
!= NULL
) {
280 *pcstack
++ = (uint64_t)pc
;
282 if (pcstack_limit
<= 0)
289 #if 0 /* XXX signal stack crawl */
290 if (oldcontext
== sp
+ s1
|| oldcontext
== sp
+ s2
) {
291 if (p
->p_model
== DATAMODEL_NATIVE
) {
292 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
293 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
295 sp
= dtrace_fulword(&gregs
[REG_FP
]);
296 pc
= dtrace_fulword(&gregs
[REG_PC
]);
298 oldcontext
= dtrace_fulword(&ucp
->uc_link
);
300 ucontext32_t
*ucp
= (ucontext32_t
*)oldcontext
;
301 greg32_t
*gregs
= ucp
->uc_mcontext
.gregs
;
303 sp
= dtrace_fuword32(&gregs
[EBP
]);
304 pc
= dtrace_fuword32(&gregs
[EIP
]);
306 oldcontext
= dtrace_fuword32(&ucp
->uc_link
);
313 pc
= dtrace_fuword64((sp
+ RETURN_OFFSET64
));
314 sp
= dtrace_fuword64(sp
);
316 pc
= dtrace_fuword32((sp
+ RETURN_OFFSET
));
317 sp
= dtrace_fuword32(sp
);
323 * This is totally bogus: if we faulted, we're going to clear
324 * the fault and break. This is to deal with the apparently
325 * broken Java stacks on x86.
327 if (*flags
& CPU_DTRACE_FAULT
) {
328 *flags
&= ~CPU_DTRACE_FAULT
;
339 * The return value indicates if we've modified the stack.
342 dtrace_adjust_stack(uint64_t **pcstack
, int *pcstack_limit
, user_addr_t
*pc
,
347 boolean_t is64Bit
= proc_is64bit(current_proc());
351 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY
)) {
353 * If we found ourselves in an entry probe, the frame pointer has not
354 * yet been pushed (that happens in the
355 * function prologue). The best approach is to
356 * add the current pc as a missing top of stack,
357 * and back the pc up to the caller, which is stored at the
358 * current stack pointer address since the call
359 * instruction puts it there right before
366 *pc
= dtrace_fuword64(sp
);
368 *pc
= dtrace_fuword32(sp
);
371 * We might have a top of stack override, in which case we just
372 * add that frame without question to the top. This
373 * happens in return probes where you have a valid
374 * frame pointer, but it's for the callers frame
375 * and you'd like to add the pc of the return site
378 missing_tos
= cpu_core
[CPU
->cpu_id
].cpuc_missing_tos
;
381 if (missing_tos
!= 0) {
382 if (pcstack
!= NULL
&& pcstack_limit
!= NULL
) {
384 * If the missing top of stack has been filled out, then
385 * we add it and adjust the size.
387 *(*pcstack
)++ = missing_tos
;
391 * return 1 because we would have changed the
392 * stack whether or not it was passed in. This
393 * ensures the stack count is correct
401 dtrace_getupcstack(uint64_t *pcstack
, int pcstack_limit
)
403 thread_t thread
= current_thread();
404 x86_saved_state_t
*regs
;
405 user_addr_t pc
, sp
, fp
;
406 volatile uint16_t *flags
=
407 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
409 boolean_t is64Bit
= proc_is64bit(current_proc());
411 if (*flags
& CPU_DTRACE_FAULT
)
414 if (pcstack_limit
<= 0)
418 * If there's no user context we still need to zero the stack.
423 regs
= (x86_saved_state_t
*)find_user_regs(thread
);
427 *pcstack
++ = (uint64_t)proc_selfpid();
430 if (pcstack_limit
<= 0)
434 pc
= regs
->ss_64
.isf
.rip
;
435 sp
= regs
->ss_64
.isf
.rsp
;
436 fp
= regs
->ss_64
.rbp
;
438 pc
= regs
->ss_32
.eip
;
439 sp
= regs
->ss_32
.uesp
;
440 fp
= regs
->ss_32
.ebp
;
444 * The return value indicates if we've modified the stack.
445 * Since there is nothing else to fix up in either case,
446 * we can safely ignore it here.
448 (void)dtrace_adjust_stack(&pcstack
, &pcstack_limit
, &pc
, sp
);
450 if(pcstack_limit
<= 0)
454 * Note that unlike ppc, the x86 code does not use
455 * CPU_DTRACE_USTACK_FP. This is because x86 always
456 * traces from the fp, even in syscall/profile/fbt
459 n
= dtrace_getustack_common(pcstack
, pcstack_limit
, pc
, fp
);
461 ASSERT(n
<= pcstack_limit
);
467 while (pcstack_limit
-- > 0)
472 dtrace_getustackdepth(void)
474 thread_t thread
= current_thread();
475 x86_saved_state_t
*regs
;
476 user_addr_t pc
, sp
, fp
;
478 boolean_t is64Bit
= proc_is64bit(current_proc());
483 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
))
486 regs
= (x86_saved_state_t
*)find_user_regs(thread
);
491 pc
= regs
->ss_64
.isf
.rip
;
492 sp
= regs
->ss_64
.isf
.rsp
;
493 fp
= regs
->ss_64
.rbp
;
495 pc
= regs
->ss_32
.eip
;
496 sp
= regs
->ss_32
.uesp
;
497 fp
= regs
->ss_32
.ebp
;
500 if (dtrace_adjust_stack(NULL
, NULL
, &pc
, sp
) == 1) {
502 * we would have adjusted the stack if we had
503 * supplied one (that is what rc == 1 means).
504 * Also, as a side effect, the pc might have
505 * been fixed up, which is good for calling
506 * in to dtrace_getustack_common.
512 * Note that unlike ppc, the x86 code does not use
513 * CPU_DTRACE_USTACK_FP. This is because x86 always
514 * traces from the fp, even in syscall/profile/fbt
518 n
+= dtrace_getustack_common(NULL
, 0, pc
, fp
);
524 dtrace_getufpstack(uint64_t *pcstack
, uint64_t *fpstack
, int pcstack_limit
)
526 thread_t thread
= current_thread();
529 volatile uint16_t *flags
=
530 (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
532 uintptr_t oldcontext
;
535 boolean_t is64Bit
= proc_is64bit(current_proc());
537 if (*flags
& CPU_DTRACE_FAULT
)
540 if (pcstack_limit
<= 0)
544 * If there's no user context we still need to zero the stack.
549 regs
= (savearea_t
*)find_user_regs(thread
);
553 *pcstack
++ = (uint64_t)proc_selfpid();
556 if (pcstack_limit
<= 0)
559 pc
= regs
->ss_32
.eip
;
560 sp
= regs
->ss_32
.ebp
;
562 #if 0 /* XXX signal stack crawl */
563 oldcontext
= lwp
->lwp_oldcontext
;
565 if (p
->p_model
== DATAMODEL_NATIVE
) {
566 s1
= sizeof (struct frame
) + 2 * sizeof (long);
567 s2
= s1
+ sizeof (siginfo_t
);
569 s1
= sizeof (struct frame32
) + 3 * sizeof (int);
570 s2
= s1
+ sizeof (siginfo32_t
);
574 if(dtrace_adjust_stack(&pcstack
, &pcstack_limit
, &pc
, sp
) == 1) {
579 if (pcstack_limit
<= 0)
584 *pcstack
++ = (uint64_t)pc
;
587 if (pcstack_limit
<= 0)
593 #if 0 /* XXX signal stack crawl */
594 if (oldcontext
== sp
+ s1
|| oldcontext
== sp
+ s2
) {
595 if (p
->p_model
== DATAMODEL_NATIVE
) {
596 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
597 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
599 sp
= dtrace_fulword(&gregs
[REG_FP
]);
600 pc
= dtrace_fulword(&gregs
[REG_PC
]);
602 oldcontext
= dtrace_fulword(&ucp
->uc_link
);
604 ucontext_t
*ucp
= (ucontext_t
*)oldcontext
;
605 greg_t
*gregs
= ucp
->uc_mcontext
.gregs
;
607 sp
= dtrace_fuword32(&gregs
[EBP
]);
608 pc
= dtrace_fuword32(&gregs
[EIP
]);
610 oldcontext
= dtrace_fuword32(&ucp
->uc_link
);
617 pc
= dtrace_fuword64((sp
+ RETURN_OFFSET64
));
618 sp
= dtrace_fuword64(sp
);
620 pc
= dtrace_fuword32((sp
+ RETURN_OFFSET
));
621 sp
= dtrace_fuword32(sp
);
627 * This is totally bogus: if we faulted, we're going to clear
628 * the fault and break. This is to deal with the apparently
629 * broken Java stacks on x86.
631 if (*flags
& CPU_DTRACE_FAULT
) {
632 *flags
&= ~CPU_DTRACE_FAULT
;
639 while (pcstack_limit
-- > 0)
644 dtrace_getpcstack(pc_t
*pcstack
, int pcstack_limit
, int aframes
,
647 struct frame
*fp
= (struct frame
*)__builtin_frame_address(0);
648 struct frame
*nextfp
, *minfp
, *stacktop
;
652 uintptr_t caller
= CPU
->cpu_dtrace_caller
;
655 if ((on_intr
= CPU_ON_INTR(CPU
)) != 0)
656 stacktop
= (struct frame
*)dtrace_get_cpu_int_stack_top();
658 stacktop
= (struct frame
*)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size
);
664 if (intrpc
!= NULL
&& depth
< pcstack_limit
)
665 pcstack
[depth
++] = (pc_t
)intrpc
;
667 while (depth
< pcstack_limit
) {
668 nextfp
= *(struct frame
**)fp
;
669 #if defined(__x86_64__)
670 pc
= *(uintptr_t *)(((uintptr_t)fp
) + RETURN_OFFSET64
);
672 pc
= *(uintptr_t *)(((uintptr_t)fp
) + RETURN_OFFSET
);
675 if (nextfp
<= minfp
|| nextfp
>= stacktop
) {
678 * Hop from interrupt stack to thread stack.
680 vm_offset_t kstack_base
= dtrace_get_kernel_stack(current_thread());
682 minfp
= (struct frame
*)kstack_base
;
683 stacktop
= (struct frame
*)(kstack_base
+ kernel_stack_size
);
689 * This is the last frame we can process; indicate
690 * that we should return after processing this frame.
696 if (--aframes
== 0 && caller
!= 0) {
698 * We've just run out of artificial frames,
699 * and we have a valid caller -- fill it in
702 ASSERT(depth
< pcstack_limit
);
703 pcstack
[depth
++] = (pc_t
)caller
;
707 if (depth
< pcstack_limit
)
708 pcstack
[depth
++] = (pc_t
)pc
;
712 while (depth
< pcstack_limit
)
713 pcstack
[depth
++] = 0;
723 struct frame
*backchain
;
728 dtrace_getarg(int arg
, int aframes
)
731 struct frame
*fp
= (struct frame
*)__builtin_frame_address(0);
737 #if defined(__x86_64__)
739 * A total of 6 arguments are passed via registers; any argument with
740 * index of 5 or lower is therefore in a register.
745 for (i
= 1; i
<= aframes
; i
++) {
749 if (pc
== (uintptr_t)dtrace_invop_callsite
) {
750 #if defined(__i386__)
752 * If we pass through the invalid op handler, we will
753 * use the pointer that it passed to the stack as the
754 * second argument to dtrace_invop() as the pointer to
755 * the frame we're hunting for.
758 stack
= (uintptr_t *)&fp
[1]; /* Find marshalled arguments */
759 fp
= (struct frame
*)stack
[1]; /* Grab *second* argument */
760 stack
= (uintptr_t *)&fp
[1]; /* Find marshalled arguments */
761 #elif defined(__x86_64__)
763 * In the case of x86_64, we will use the pointer to the
764 * save area structure that was pushed when we took the
765 * trap. To get this structure, we must increment
766 * beyond the frame structure. If the
767 * argument that we're seeking is passed on the stack,
768 * we'll pull the true stack pointer out of the saved
769 * registers and decrement our argument by the number
770 * of arguments passed in registers; if the argument
771 * we're seeking is passed in regsiters, we can just
775 /* fp points to frame of dtrace_invop() activation. */
776 fp
= fp
->backchain
; /* to fbt_perfcallback() activation. */
777 fp
= fp
->backchain
; /* to kernel_trap() activation. */
778 fp
= fp
->backchain
; /* to trap_from_kernel() activation. */
780 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)&fp
[1];
781 x86_saved_state64_t
*saved_state
= saved_state64(tagged_regs
);
784 stack
= (uintptr_t *)&saved_state
->rdi
;
786 stack
= (uintptr_t *)(saved_state
->isf
.rsp
);
797 * Arrive here when provider has called dtrace_probe directly.
799 arg
++; /* Advance past probeID */
801 #if defined(__x86_64__)
804 * This shouldn't happen. If the argument is passed in a
805 * register then it should have been, well, passed in a
808 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
814 stack
= (uintptr_t *)&fp
[1]; /* Find marshalled arguments */
817 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
818 val
= *(((uint64_t *)stack
) + arg
); /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
819 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
828 dtrace_toxic_ranges(void (*func
)(uintptr_t base
, uintptr_t limit
))
831 * "base" is the smallest toxic address in the range, "limit" is the first
832 * VALID address greater than "base".
834 func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
835 if (VM_MAX_KERNEL_ADDRESS
< ~(uintptr_t)0)
836 func(VM_MAX_KERNEL_ADDRESS
+ 1, ~(uintptr_t)0);