4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
32 #include <sys/systm.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
42 #define _KERNEL /* Solaris vs. Darwin */
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
56 typedef x86_saved_state_t savearea_t
;
59 #include <sys/param.h>
60 #include <sys/systm.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
72 #include <sys/systm.h>
76 #include <machine/pal_routines.h>
78 #if defined(__i386__) || defined (__x86_64__)
79 #define SYSTRACE_ARTIFICIAL_FRAMES 2
80 #define MACHTRACE_ARTIFICIAL_FRAMES 3
82 #error Unknown Architecture
85 #include <sys/sysent.h>
86 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
87 #define NSYSCALL nsysent /* and is less than 500 or so */
89 extern const char *syscallnames
[];
91 #include <sys/dtrace_glue.h>
92 #define casptr dtrace_casptr
93 #define membar_enter dtrace_membar_producer
95 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
96 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
98 extern lck_attr_t
* dtrace_lck_attr
;
99 extern lck_grp_t
* dtrace_lck_grp
;
100 static lck_mtx_t dtrace_systrace_lock
; /* probe state lock */
102 systrace_sysent_t
*systrace_sysent
= NULL
;
103 void (*systrace_probe
)(dtrace_id_t
, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
105 static uint64_t systrace_getarg(void *, dtrace_id_t
, void *, int, int);
108 systrace_stub(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
109 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
111 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
115 dtrace_systrace_syscall(struct proc
*pp
, void *uap
, int *rv
)
120 systrace_sysent_t
*sy
;
126 syscall_arg_t
*ip
= (syscall_arg_t
*)uap
;
128 #if defined(__i386__) || defined (__x86_64__)
129 #pragma unused(flavor)
131 pal_register_cache_state(current_thread(), VALID
);
132 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
134 if (is_saved_state64(tagged_regs
)) {
135 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
136 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
138 * Check for indirect system call... system call number
145 code
= saved_state32(tagged_regs
)->eax
& I386_SYSCALL_NUMBER_MASK
;
148 vm_offset_t params
= (vm_offset_t
) (saved_state32(tagged_regs
)->uesp
+ sizeof (int));
149 code
= fuword(params
);
154 #error Unknown Architecture
157 // Bounds "check" the value of code a la unix_syscall
158 sy
= (code
>= NUM_SYSENT
) ? &systrace_sysent
[63] : &systrace_sysent
[code
];
160 if ((id
= sy
->stsy_entry
) != DTRACE_IDNONE
) {
161 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
163 uthread
->t_dtrace_syscall_args
= (void *)ip
;
166 (*systrace_probe
)(id
, *ip
, *(ip
+1), *(ip
+2), *(ip
+3), *(ip
+4));
168 (*systrace_probe
)(id
, 0, 0, 0, 0, 0);
171 uthread
->t_dtrace_syscall_args
= (void *)0;
176 * We want to explicitly allow DTrace consumers to stop a process
177 * before it actually executes the meat of the syscall.
179 p
= ttoproc(curthread
);
180 mutex_enter(&p
->p_lock
);
181 if (curthread
->t_dtrace_stop
&& !curthread
->t_lwp
->lwp_nostop
) {
182 curthread
->t_dtrace_stop
= 0;
183 stop(PR_REQUESTED
, 0);
185 mutex_exit(&p
->p_lock
);
188 rval
= (*sy
->stsy_underlying
)(pp
, uap
, rv
);
190 if ((id
= sy
->stsy_return
) != DTRACE_IDNONE
) {
191 uint64_t munged_rv0
, munged_rv1
;
192 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
195 uthread
->t_dtrace_errno
= rval
; /* Establish t_dtrace_errno now in case this enabling refers to it. */
198 * "Decode" rv for use in the call to dtrace_probe()
200 if (rval
== ERESTART
) {
201 munged_rv0
= -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
203 } else if (rval
!= EJUSTRETURN
) {
205 munged_rv0
= -1LL; /* Mimic what libc will do. */
208 switch (sy
->stsy_return_type
) {
209 case _SYSCALL_RET_INT_T
:
213 case _SYSCALL_RET_UINT_T
:
214 munged_rv0
= ((u_int
)rv
[0]);
215 munged_rv1
= ((u_int
)rv
[1]);
217 case _SYSCALL_RET_OFF_T
:
218 case _SYSCALL_RET_UINT64_T
:
219 munged_rv0
= *(u_int64_t
*)rv
;
222 case _SYSCALL_RET_ADDR_T
:
223 case _SYSCALL_RET_SIZE_T
:
224 case _SYSCALL_RET_SSIZE_T
:
225 munged_rv0
= *(user_addr_t
*)rv
;
228 case _SYSCALL_RET_NONE
:
244 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
246 * "This is a bit of an historical artifact. At first, the syscall provider just
247 * had its return value in arg0, and the fbt and pid providers had their return
248 * values in arg1 (so that we could use arg0 for the offset of the return site).
250 * We inevitably started writing scripts where we wanted to see the return
251 * values from probes in all three providers, and we made this script easier
252 * to write by replicating the syscall return values in arg1 to match fbt and
253 * pid. We debated briefly about removing the return value from arg0, but
254 * decided that it would be less confusing to have the same data in two places
255 * than to have some non-helpful, non-intuitive value in arg0.
257 * This change was made 4/23/2003 according to the DTrace project's putback log."
259 (*systrace_probe
)(id
, munged_rv0
, munged_rv0
, munged_rv1
, (uint64_t)rval
, 0);
266 dtrace_systrace_syscall_return(unsigned short code
, int rval
, int *rv
)
268 systrace_sysent_t
*sy
;
271 // Bounds "check" the value of code a la unix_syscall_return
272 sy
= (code
>= NUM_SYSENT
) ? &systrace_sysent
[63] : &systrace_sysent
[code
];
274 if ((id
= sy
->stsy_return
) != DTRACE_IDNONE
) {
275 uint64_t munged_rv0
, munged_rv1
;
276 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
279 uthread
->t_dtrace_errno
= rval
; /* Establish t_dtrace_errno now in case this enabling refers to it. */
282 * "Decode" rv for use in the call to dtrace_probe()
284 if (rval
== ERESTART
) {
285 munged_rv0
= -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
287 } else if (rval
!= EJUSTRETURN
) {
289 munged_rv0
= -1LL; /* Mimic what libc will do. */
292 switch (sy
->stsy_return_type
) {
293 case _SYSCALL_RET_INT_T
:
297 case _SYSCALL_RET_UINT_T
:
298 munged_rv0
= ((u_int
)rv
[0]);
299 munged_rv1
= ((u_int
)rv
[1]);
301 case _SYSCALL_RET_OFF_T
:
302 case _SYSCALL_RET_UINT64_T
:
303 munged_rv0
= *(u_int64_t
*)rv
;
306 case _SYSCALL_RET_ADDR_T
:
307 case _SYSCALL_RET_SIZE_T
:
308 case _SYSCALL_RET_SSIZE_T
:
309 munged_rv0
= *(user_addr_t
*)rv
;
312 case _SYSCALL_RET_NONE
:
327 (*systrace_probe
)(id
, munged_rv0
, munged_rv0
, munged_rv1
, (uint64_t)rval
, 0);
330 #endif /* __APPLE__ */
332 #define SYSTRACE_SHIFT 16
333 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
334 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
335 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
336 #define SYSTRACE_RETURN(id) (id)
338 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
339 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
342 static dev_info_t
*systrace_devi
;
343 static dtrace_provider_id_t systrace_id
;
345 #if !defined (__APPLE__)
347 systrace_init(struct sysent
*actual
, systrace_sysent_t
**interposed
)
349 systrace_sysent_t
*sysent
= *interposed
;
352 if (sysent
== NULL
) {
353 *interposed
= sysent
= kmem_zalloc(sizeof (systrace_sysent_t
) *
357 for (i
= 0; i
< NSYSCALL
; i
++) {
358 struct sysent
*a
= &actual
[i
];
359 systrace_sysent_t
*s
= &sysent
[i
];
361 if (LOADABLE_SYSCALL(a
) && !LOADED_SYSCALL(a
))
364 if (a
->sy_callc
== dtrace_systrace_syscall
)
367 #ifdef _SYSCALL32_IMPL
368 if (a
->sy_callc
== dtrace_systrace_syscall32
)
372 s
->stsy_underlying
= a
->sy_callc
;
376 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
378 systrace_init(struct sysent
*actual
, systrace_sysent_t
**interposed
)
381 systrace_sysent_t
*ssysent
= *interposed
; /* Avoid sysent shadow warning
382 from bsd/sys/sysent.h */
385 if (ssysent
== NULL
) {
386 *interposed
= ssysent
= kmem_zalloc(sizeof (systrace_sysent_t
) *
390 for (i
= 0; i
< NSYSCALL
; i
++) {
391 struct sysent
*a
= &actual
[i
];
392 systrace_sysent_t
*s
= &ssysent
[i
];
394 if (LOADABLE_SYSCALL(a
) && !LOADED_SYSCALL(a
))
397 if (a
->sy_callc
== dtrace_systrace_syscall
)
400 #ifdef _SYSCALL32_IMPL
401 if (a
->sy_callc
== dtrace_systrace_syscall32
)
405 s
->stsy_underlying
= a
->sy_callc
;
406 s
->stsy_return_type
= a
->sy_return_type
;
408 lck_mtx_init(&dtrace_systrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
411 #endif /* __APPLE__ */
415 systrace_provide(void *arg
, const dtrace_probedesc_t
*desc
)
417 #pragma unused(arg) /* __APPLE__ */
423 systrace_init(sysent
, &systrace_sysent
);
424 #ifdef _SYSCALL32_IMPL
425 systrace_init(sysent32
, &systrace_sysent32
);
428 for (i
= 0; i
< NSYSCALL
; i
++) {
429 if (systrace_sysent
[i
].stsy_underlying
== NULL
)
432 if (dtrace_probe_lookup(systrace_id
, NULL
,
433 syscallnames
[i
], "entry") != 0)
436 (void) dtrace_probe_create(systrace_id
, NULL
, syscallnames
[i
],
437 "entry", SYSTRACE_ARTIFICIAL_FRAMES
,
438 (void *)((uintptr_t)SYSTRACE_ENTRY(i
)));
439 (void) dtrace_probe_create(systrace_id
, NULL
, syscallnames
[i
],
440 "return", SYSTRACE_ARTIFICIAL_FRAMES
,
441 (void *)((uintptr_t)SYSTRACE_RETURN(i
)));
443 systrace_sysent
[i
].stsy_entry
= DTRACE_IDNONE
;
444 systrace_sysent
[i
].stsy_return
= DTRACE_IDNONE
;
445 #ifdef _SYSCALL32_IMPL
446 systrace_sysent32
[i
].stsy_entry
= DTRACE_IDNONE
;
447 systrace_sysent32
[i
].stsy_return
= DTRACE_IDNONE
;
451 #if defined(__APPLE__)
457 systrace_destroy(void *arg
, dtrace_id_t id
, void *parg
)
459 #pragma unused(arg,id) /* __APPLE__ */
461 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
463 #pragma unused(sysnum) /* __APPLE__ */
465 * There's nothing to do here but assert that we have actually been
468 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
469 ASSERT(systrace_sysent
[sysnum
].stsy_entry
== DTRACE_IDNONE
);
470 #ifdef _SYSCALL32_IMPL
471 ASSERT(systrace_sysent32
[sysnum
].stsy_entry
== DTRACE_IDNONE
);
474 ASSERT(systrace_sysent
[sysnum
].stsy_return
== DTRACE_IDNONE
);
475 #ifdef _SYSCALL32_IMPL
476 ASSERT(systrace_sysent32
[sysnum
].stsy_return
== DTRACE_IDNONE
);
483 systrace_enable(void *arg
, dtrace_id_t id
, void *parg
)
485 #pragma unused(arg) /* __APPLE__ */
487 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
488 int enabled
= (systrace_sysent
[sysnum
].stsy_entry
!= DTRACE_IDNONE
||
489 systrace_sysent
[sysnum
].stsy_return
!= DTRACE_IDNONE
);
491 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
492 systrace_sysent
[sysnum
].stsy_entry
= id
;
493 #ifdef _SYSCALL32_IMPL
494 systrace_sysent32
[sysnum
].stsy_entry
= id
;
497 systrace_sysent
[sysnum
].stsy_return
= id
;
498 #ifdef _SYSCALL32_IMPL
499 systrace_sysent32
[sysnum
].stsy_return
= id
;
504 ASSERT(sysent
[sysnum
].sy_callc
== dtrace_systrace_syscall
);
507 #ifdef _SYSCALL32_IMPL
508 (void) casptr(&sysent32
[sysnum
].sy_callc
,
509 (void *)systrace_sysent32
[sysnum
].stsy_underlying
,
510 (void *)dtrace_systrace_syscall32
);
513 lck_mtx_lock(&dtrace_systrace_lock
);
514 if (sysent
[sysnum
].sy_callc
== systrace_sysent
[sysnum
].stsy_underlying
) {
515 vm_offset_t dss
= (vm_offset_t
)&dtrace_systrace_syscall
;
516 ml_nofault_copy((vm_offset_t
)&dss
, (vm_offset_t
)&sysent
[sysnum
].sy_callc
, sizeof(vm_offset_t
));
518 lck_mtx_unlock(&dtrace_systrace_lock
);
524 systrace_disable(void *arg
, dtrace_id_t id
, void *parg
)
526 #pragma unused(arg,id) /* __APPLE__ */
528 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
529 int disable
= (systrace_sysent
[sysnum
].stsy_entry
== DTRACE_IDNONE
||
530 systrace_sysent
[sysnum
].stsy_return
== DTRACE_IDNONE
);
533 lck_mtx_lock(&dtrace_systrace_lock
);
534 if (sysent
[sysnum
].sy_callc
== dtrace_systrace_syscall
)
535 ml_nofault_copy((vm_offset_t
)&systrace_sysent
[sysnum
].stsy_underlying
, (vm_offset_t
)&sysent
[sysnum
].sy_callc
, sizeof(systrace_sysent
[sysnum
].stsy_underlying
));
536 lck_mtx_unlock(&dtrace_systrace_lock
);
538 #ifdef _SYSCALL32_IMPL
539 (void) casptr(&sysent32
[sysnum
].sy_callc
,
540 (void *)dtrace_systrace_syscall32
,
541 (void *)systrace_sysent32
[sysnum
].stsy_underlying
);
545 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
546 systrace_sysent
[sysnum
].stsy_entry
= DTRACE_IDNONE
;
547 #ifdef _SYSCALL32_IMPL
548 systrace_sysent32
[sysnum
].stsy_entry
= DTRACE_IDNONE
;
551 systrace_sysent
[sysnum
].stsy_return
= DTRACE_IDNONE
;
552 #ifdef _SYSCALL32_IMPL
553 systrace_sysent32
[sysnum
].stsy_return
= DTRACE_IDNONE
;
558 static dtrace_pattr_t systrace_attr
= {
559 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
560 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
561 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
562 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
563 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
566 static dtrace_pops_t systrace_pops
= {
580 systrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
586 return (DDI_SUCCESS
);
588 return (DDI_FAILURE
);
591 #if !defined(__APPLE__)
592 systrace_probe
= (void (*)())dtrace_probe
;
595 if (ddi_create_minor_node(devi
, "systrace", S_IFCHR
, 0,
596 DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
597 dtrace_register("syscall", &systrace_attr
, DTRACE_PRIV_USER
, NULL
,
598 &systrace_pops
, NULL
, &systrace_id
) != 0) {
599 systrace_probe
= systrace_stub
;
600 ddi_remove_minor_node(devi
, NULL
);
601 return (DDI_FAILURE
);
604 systrace_probe
= (void(*))&dtrace_probe
;
607 if (ddi_create_minor_node(devi
, "systrace", S_IFCHR
, 0,
608 DDI_PSEUDO
, 0) == DDI_FAILURE
||
609 dtrace_register("syscall", &systrace_attr
, DTRACE_PRIV_USER
, NULL
,
610 &systrace_pops
, NULL
, &systrace_id
) != 0) {
611 systrace_probe
= systrace_stub
;
612 ddi_remove_minor_node(devi
, NULL
);
613 return (DDI_FAILURE
);
615 #endif /* __APPLE__ */
617 ddi_report_dev(devi
);
618 systrace_devi
= devi
;
620 return (DDI_SUCCESS
);
623 #if !defined(__APPLE__)
625 systrace_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
631 return (DDI_SUCCESS
);
633 return (DDI_FAILURE
);
636 if (dtrace_unregister(systrace_id
) != 0)
637 return (DDI_FAILURE
);
639 ddi_remove_minor_node(devi
, NULL
);
640 systrace_probe
= systrace_stub
;
641 return (DDI_SUCCESS
);
646 systrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
651 case DDI_INFO_DEVT2DEVINFO
:
652 *result
= (void *)systrace_devi
;
655 case DDI_INFO_DEVT2INSTANCE
:
667 systrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
672 static struct cb_ops systrace_cb_ops
= {
673 systrace_open
, /* open */
675 nulldev
, /* strategy */
685 ddi_prop_op
, /* cb_prop_op */
687 D_NEW
| D_MP
/* Driver compatibility flag */
690 static struct dev_ops systrace_ops
= {
691 DEVO_REV
, /* devo_rev, */
693 systrace_info
, /* get_dev_info */
694 nulldev
, /* identify */
696 systrace_attach
, /* attach */
697 systrace_detach
, /* detach */
699 &systrace_cb_ops
, /* driver operations */
700 NULL
, /* bus operations */
701 nodev
/* dev power */
705 * Module linkage information for the kernel.
707 static struct modldrv modldrv
= {
708 &mod_driverops
, /* module type (this is a pseudo driver) */
709 "System Call Tracing", /* name of module */
710 &systrace_ops
, /* driver ops */
713 static struct modlinkage modlinkage
= {
722 return (mod_install(&modlinkage
));
726 _info(struct modinfo
*modinfop
)
728 return (mod_info(&modlinkage
, modinfop
));
734 return (mod_remove(&modlinkage
));
737 typedef kern_return_t (*mach_call_t
)(void *);
739 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
740 typedef void mach_munge_t(const void *, void *);
743 int mach_trap_arg_count
;
744 kern_return_t (*mach_trap_function
)(void *);
745 #if 0 /* no active architectures use mungers for mach traps */
746 mach_munge_t
*mach_trap_arg_munge32
; /* system call arguments for 32-bit */
747 mach_munge_t
*mach_trap_arg_munge64
; /* system call arguments for 64-bit */
750 const char* mach_trap_name
;
751 #endif /* MACH_ASSERT */
754 extern mach_trap_t mach_trap_table
[];
755 extern int mach_trap_count
;
757 extern const char *mach_syscall_name_table
[];
759 /* XXX From osfmk/i386/bsd_i386.c */
760 struct mach_call_args
{
773 #define NSYSCALL mach_trap_count
775 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
776 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
779 typedef struct machtrace_sysent
{
780 dtrace_id_t stsy_entry
;
781 dtrace_id_t stsy_return
;
782 kern_return_t (*stsy_underlying
)(void *);
783 int32_t stsy_return_type
;
784 } machtrace_sysent_t
;
786 static machtrace_sysent_t
*machtrace_sysent
= NULL
;
788 void (*machtrace_probe
)(dtrace_id_t
, uint64_t, uint64_t,
789 uint64_t, uint64_t, uint64_t);
791 static uint64_t machtrace_getarg(void *, dtrace_id_t
, void *, int, int);
793 static dev_info_t
*machtrace_devi
;
794 static dtrace_provider_id_t machtrace_id
;
797 dtrace_machtrace_syscall(struct mach_call_args
*args
)
802 machtrace_sysent_t
*sy
;
808 syscall_arg_t
*ip
= (syscall_arg_t
*)args
;
809 mach_call_t mach_call
;
811 #if defined(__i386__) || defined (__x86_64__)
812 #pragma unused(flavor)
814 pal_register_cache_state(current_thread(), VALID
);
815 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
817 if (is_saved_state64(tagged_regs
)) {
818 code
= saved_state64(tagged_regs
)->rax
& SYSCALL_NUMBER_MASK
;
820 code
= -saved_state32(tagged_regs
)->eax
;
824 #error Unknown Architecture
827 sy
= &machtrace_sysent
[code
];
829 if ((id
= sy
->stsy_entry
) != DTRACE_IDNONE
) {
830 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
833 uthread
->t_dtrace_syscall_args
= (void *)ip
;
835 (*machtrace_probe
)(id
, *ip
, *(ip
+1), *(ip
+2), *(ip
+3), *(ip
+4));
838 uthread
->t_dtrace_syscall_args
= (void *)0;
843 * We want to explicitly allow DTrace consumers to stop a process
844 * before it actually executes the meat of the syscall.
846 p
= ttoproc(curthread
);
847 mutex_enter(&p
->p_lock
);
848 if (curthread
->t_dtrace_stop
&& !curthread
->t_lwp
->lwp_nostop
) {
849 curthread
->t_dtrace_stop
= 0;
850 stop(PR_REQUESTED
, 0);
852 mutex_exit(&p
->p_lock
);
855 mach_call
= (mach_call_t
)(*sy
->stsy_underlying
);
856 rval
= mach_call(args
);
858 if ((id
= sy
->stsy_return
) != DTRACE_IDNONE
)
859 (*machtrace_probe
)(id
, (uint64_t)rval
, 0, 0, 0, 0);
865 machtrace_init(mach_trap_t
*actual
, machtrace_sysent_t
**interposed
)
867 machtrace_sysent_t
*msysent
= *interposed
;
870 if (msysent
== NULL
) {
871 *interposed
= msysent
= kmem_zalloc(sizeof (machtrace_sysent_t
) *
875 for (i
= 0; i
< NSYSCALL
; i
++) {
876 mach_trap_t
*a
= &actual
[i
];
877 machtrace_sysent_t
*s
= &msysent
[i
];
879 if (LOADABLE_SYSCALL(a
) && !LOADED_SYSCALL(a
))
882 if (a
->mach_trap_function
== (mach_call_t
)(dtrace_machtrace_syscall
))
885 s
->stsy_underlying
= a
->mach_trap_function
;
891 machtrace_provide(void *arg
, const dtrace_probedesc_t
*desc
)
893 #pragma unused(arg) /* __APPLE__ */
900 machtrace_init(mach_trap_table
, &machtrace_sysent
);
902 for (i
= 0; i
< NSYSCALL
; i
++) {
904 if (machtrace_sysent
[i
].stsy_underlying
== NULL
)
907 if (dtrace_probe_lookup(machtrace_id
, NULL
,
908 mach_syscall_name_table
[i
], "entry") != 0)
911 (void) dtrace_probe_create(machtrace_id
, NULL
, mach_syscall_name_table
[i
],
912 "entry", MACHTRACE_ARTIFICIAL_FRAMES
,
913 (void *)((uintptr_t)SYSTRACE_ENTRY(i
)));
914 (void) dtrace_probe_create(machtrace_id
, NULL
, mach_syscall_name_table
[i
],
915 "return", MACHTRACE_ARTIFICIAL_FRAMES
,
916 (void *)((uintptr_t)SYSTRACE_RETURN(i
)));
918 machtrace_sysent
[i
].stsy_entry
= DTRACE_IDNONE
;
919 machtrace_sysent
[i
].stsy_return
= DTRACE_IDNONE
;
925 machtrace_destroy(void *arg
, dtrace_id_t id
, void *parg
)
927 #pragma unused(arg,id) /* __APPLE__ */
928 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
930 #pragma unused(sysnum) /* __APPLE__ */
933 * There's nothing to do here but assert that we have actually been
936 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
937 ASSERT(machtrace_sysent
[sysnum
].stsy_entry
== DTRACE_IDNONE
);
939 ASSERT(machtrace_sysent
[sysnum
].stsy_return
== DTRACE_IDNONE
);
945 machtrace_enable(void *arg
, dtrace_id_t id
, void *parg
)
947 #pragma unused(arg) /* __APPLE__ */
949 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
950 int enabled
= (machtrace_sysent
[sysnum
].stsy_entry
!= DTRACE_IDNONE
||
951 machtrace_sysent
[sysnum
].stsy_return
!= DTRACE_IDNONE
);
953 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
954 machtrace_sysent
[sysnum
].stsy_entry
= id
;
956 machtrace_sysent
[sysnum
].stsy_return
= id
;
960 ASSERT(mach_trap_table
[sysnum
].mach_trap_function
== (void *)dtrace_machtrace_syscall
);
964 lck_mtx_lock(&dtrace_systrace_lock
);
966 if (mach_trap_table
[sysnum
].mach_trap_function
== machtrace_sysent
[sysnum
].stsy_underlying
) {
967 vm_offset_t dss
= (vm_offset_t
)&dtrace_machtrace_syscall
;
968 ml_nofault_copy((vm_offset_t
)&dss
, (vm_offset_t
)&mach_trap_table
[sysnum
].mach_trap_function
, sizeof(vm_offset_t
));
971 lck_mtx_unlock(&dtrace_systrace_lock
);
978 machtrace_disable(void *arg
, dtrace_id_t id
, void *parg
)
980 #pragma unused(arg,id) /* __APPLE__ */
982 int sysnum
= SYSTRACE_SYSNUM((uintptr_t)parg
);
983 int disable
= (machtrace_sysent
[sysnum
].stsy_entry
== DTRACE_IDNONE
||
984 machtrace_sysent
[sysnum
].stsy_return
== DTRACE_IDNONE
);
988 lck_mtx_lock(&dtrace_systrace_lock
);
990 if (mach_trap_table
[sysnum
].mach_trap_function
== (mach_call_t
)dtrace_machtrace_syscall
) {
991 ml_nofault_copy((vm_offset_t
)&machtrace_sysent
[sysnum
].stsy_underlying
, (vm_offset_t
)&mach_trap_table
[sysnum
].mach_trap_function
, sizeof(vm_offset_t
));
993 lck_mtx_unlock(&dtrace_systrace_lock
);
996 if (SYSTRACE_ISENTRY((uintptr_t)parg
)) {
997 machtrace_sysent
[sysnum
].stsy_entry
= DTRACE_IDNONE
;
999 machtrace_sysent
[sysnum
].stsy_return
= DTRACE_IDNONE
;
1003 static dtrace_pattr_t machtrace_attr
= {
1004 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
1005 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
1006 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
1007 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
1008 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
1011 static dtrace_pops_t machtrace_pops
= {
1025 machtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
1031 return (DDI_SUCCESS
);
1033 return (DDI_FAILURE
);
1036 #if !defined(__APPLE__)
1037 machtrace_probe
= (void (*)())dtrace_probe
;
1040 if (ddi_create_minor_node(devi
, "machtrace", S_IFCHR
, 0,
1041 DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
1042 dtrace_register("mach_trap", &machtrace_attr
, DTRACE_PRIV_USER
, NULL
,
1043 &machtrace_pops
, NULL
, &machtrace_id
) != 0) {
1044 machtrace_probe
= systrace_stub
;
1046 machtrace_probe
= dtrace_probe
;
1049 if (ddi_create_minor_node(devi
, "machtrace", S_IFCHR
, 0,
1050 DDI_PSEUDO
, 0) == DDI_FAILURE
||
1051 dtrace_register("mach_trap", &machtrace_attr
, DTRACE_PRIV_USER
, NULL
,
1052 &machtrace_pops
, NULL
, &machtrace_id
) != 0) {
1053 machtrace_probe
= (void (*))&systrace_stub
;
1054 #endif /* __APPLE__ */
1055 ddi_remove_minor_node(devi
, NULL
);
1056 return (DDI_FAILURE
);
1059 ddi_report_dev(devi
);
1060 machtrace_devi
= devi
;
1062 return (DDI_SUCCESS
);
1065 d_open_t _systrace_open
;
1067 int _systrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
1069 #pragma unused(dev,flags,devtype,p)
1073 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1076 * A struct describing which functions will get invoked for certain
1079 static struct cdevsw systrace_cdevsw
=
1081 _systrace_open
, /* open */
1082 eno_opcl
, /* close */
1083 eno_rdwrt
, /* read */
1084 eno_rdwrt
, /* write */
1085 eno_ioctl
, /* ioctl */
1086 (stop_fcn_t
*)nulldev
, /* stop */
1087 (reset_fcn_t
*)nulldev
, /* reset */
1089 eno_select
, /* select */
1090 eno_mmap
, /* mmap */
1091 eno_strat
, /* strategy */
1092 eno_getc
, /* getc */
1093 eno_putc
, /* putc */
1097 static int gSysTraceInited
= 0;
1099 void systrace_init( void );
1101 void systrace_init( void )
1103 if (0 == gSysTraceInited
) {
1104 int majdevno
= cdevsw_add(SYSTRACE_MAJOR
, &systrace_cdevsw
);
1107 printf("systrace_init: failed to allocate a major number!\n");
1108 gSysTraceInited
= 0;
1112 systrace_attach( (dev_info_t
*)(uintptr_t)majdevno
, DDI_ATTACH
);
1113 machtrace_attach( (dev_info_t
*)(uintptr_t)majdevno
, DDI_ATTACH
);
1115 gSysTraceInited
= 1;
1117 panic("systrace_init: called twice!\n");
1119 #undef SYSTRACE_MAJOR
1120 #endif /* __APPLE__ */
1123 systrace_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
, int aframes
)
1125 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1127 syscall_arg_t
*stack
= (syscall_arg_t
*)NULL
;
1129 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
1132 stack
= (syscall_arg_t
*)uthread
->t_dtrace_syscall_args
;
1137 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1138 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1139 val
= (uint64_t)*(stack
+argno
);
1140 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1146 machtrace_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
, int aframes
)
1148 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1150 syscall_arg_t
*stack
= (syscall_arg_t
*)NULL
;
1152 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
1155 stack
= (syscall_arg_t
*)uthread
->t_dtrace_syscall_args
;
1160 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1161 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1162 val
= (uint64_t)*(stack
+argno
);
1163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);