]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/systrace.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
6d2010ae 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
b0d623f7 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
2d21ac55
A
27
28#if !defined(__APPLE__)
29#include <sys/dtrace.h>
30#include <sys/systrace.h>
31#include <sys/stat.h>
32#include <sys/systm.h>
33#include <sys/conf.h>
34#include <sys/ddi.h>
35#include <sys/sunddi.h>
36#include <sys/atomic.h>
37#define SYSTRACE_ARTIFICIAL_FRAMES 1
38#else
39
40#ifdef KERNEL
41#ifndef _KERNEL
42#define _KERNEL /* Solaris vs. Darwin */
43#endif
44#endif
45
46#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47#include <kern/thread.h>
48#include <mach/thread_status.h>
49/* XXX All of these should really be derived from syscall_sw.h */
50#if defined(__i386__) || defined (__x86_64__)
51#define SYSCALL_CLASS_SHIFT 24
52#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54#define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56typedef x86_saved_state_t savearea_t;
2d21ac55
A
57#endif
58
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/proc.h>
62#include <sys/errno.h>
63#include <sys/ioctl.h>
64#include <sys/conf.h>
65#include <sys/fcntl.h>
66#include <miscfs/devfs/devfs.h>
67
68#include <sys/dtrace.h>
69#include <sys/dtrace_impl.h>
70#include "systrace.h"
71#include <sys/stat.h>
72#include <sys/systm.h>
73#include <sys/conf.h>
74#include <sys/user.h>
75
6d2010ae
A
76#include <machine/pal_routines.h>
77
78#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
79#define SYSTRACE_ARTIFICIAL_FRAMES 2
80#define MACHTRACE_ARTIFICIAL_FRAMES 3
2d21ac55
A
81#else
82#error Unknown Architecture
83#endif
84
85#include <sys/sysent.h>
86#define sy_callc sy_call /* Map Solaris slot name to Darwin's */
87#define NSYSCALL nsysent /* and is less than 500 or so */
88
89extern const char *syscallnames[];
90
91#include <sys/dtrace_glue.h>
92#define casptr dtrace_casptr
93#define membar_enter dtrace_membar_producer
94
95#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
96#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
97
316670eb
A
98extern lck_attr_t* dtrace_lck_attr;
99extern lck_grp_t* dtrace_lck_grp;
100static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
101
2d21ac55 102systrace_sysent_t *systrace_sysent = NULL;
316670eb
A
103void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
104
105static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);
2d21ac55
A
106
107void
108systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
316670eb 109 uint64_t arg2, uint64_t arg3, uint64_t arg4)
2d21ac55 110{
316670eb 111#pragma unused(id,arg0,arg1,arg2,arg3,arg4)
2d21ac55
A
112}
113
114int32_t
115dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
116{
117 boolean_t flavor;
118 unsigned short code;
119
120 systrace_sysent_t *sy;
121 dtrace_id_t id;
122 int32_t rval;
123#if 0 /* XXX */
124 proc_t *p;
125#endif
126 syscall_arg_t *ip = (syscall_arg_t *)uap;
127
6d2010ae 128#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
129#pragma unused(flavor)
130 {
6d2010ae 131 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
132 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
133
134 if (is_saved_state64(tagged_regs)) {
135 x86_saved_state64_t *regs = saved_state64(tagged_regs);
136 code = regs->rax & SYSCALL_NUMBER_MASK;
137 /*
138 * Check for indirect system call... system call number
139 * passed as 'arg0'
140 */
141 if (code == 0) {
142 code = regs->rdi;
143 }
144 } else {
145 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
b0d623f7
A
146
147 if (code == 0) {
148 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
149 code = fuword(params);
150 }
2d21ac55
A
151 }
152 }
2d21ac55
A
153#else
154#error Unknown Architecture
155#endif
156
157 // Bounds "check" the value of code a la unix_syscall
158 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
159
c910b4d9 160 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
316670eb
A
161 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
162 if (uthread)
163 uthread->t_dtrace_syscall_args = (void *)ip;
164
c910b4d9 165 if (ip)
316670eb 166 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
c910b4d9 167 else
316670eb
A
168 (*systrace_probe)(id, 0, 0, 0, 0, 0);
169
170 if (uthread)
171 uthread->t_dtrace_syscall_args = (void *)0;
c910b4d9 172 }
2d21ac55
A
173
174#if 0 /* XXX */
175 /*
176 * We want to explicitly allow DTrace consumers to stop a process
177 * before it actually executes the meat of the syscall.
178 */
179 p = ttoproc(curthread);
180 mutex_enter(&p->p_lock);
181 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
182 curthread->t_dtrace_stop = 0;
183 stop(PR_REQUESTED, 0);
184 }
185 mutex_exit(&p->p_lock);
186#endif
187
188 rval = (*sy->stsy_underlying)(pp, uap, rv);
189
190 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 191 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
192 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
193
194 if (uthread)
195 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
196
197 /*
198 * "Decode" rv for use in the call to dtrace_probe()
199 */
200 if (rval == ERESTART) {
b0d623f7
A
201 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
202 munged_rv1 = -1LL;
2d21ac55
A
203 } else if (rval != EJUSTRETURN) {
204 if (rval) {
b0d623f7
A
205 munged_rv0 = -1LL; /* Mimic what libc will do. */
206 munged_rv1 = -1LL;
2d21ac55
A
207 } else {
208 switch (sy->stsy_return_type) {
209 case _SYSCALL_RET_INT_T:
b0d623f7
A
210 munged_rv0 = rv[0];
211 munged_rv1 = rv[1];
2d21ac55
A
212 break;
213 case _SYSCALL_RET_UINT_T:
b0d623f7
A
214 munged_rv0 = ((u_int)rv[0]);
215 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
216 break;
217 case _SYSCALL_RET_OFF_T:
d1ecb069 218 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
219 munged_rv0 = *(u_int64_t *)rv;
220 munged_rv1 = 0LL;
2d21ac55
A
221 break;
222 case _SYSCALL_RET_ADDR_T:
223 case _SYSCALL_RET_SIZE_T:
224 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
225 munged_rv0 = *(user_addr_t *)rv;
226 munged_rv1 = 0LL;
2d21ac55
A
227 break;
228 case _SYSCALL_RET_NONE:
b0d623f7
A
229 munged_rv0 = 0LL;
230 munged_rv1 = 0LL;
2d21ac55
A
231 break;
232 default:
b0d623f7
A
233 munged_rv0 = 0LL;
234 munged_rv1 = 0LL;
2d21ac55
A
235 break;
236 }
237 }
b0d623f7
A
238 } else {
239 munged_rv0 = 0LL;
240 munged_rv1 = 0LL;
241 }
2d21ac55 242
b0d623f7
A
243 /*
244 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
245 *
246 * "This is a bit of an historical artifact. At first, the syscall provider just
247 * had its return value in arg0, and the fbt and pid providers had their return
248 * values in arg1 (so that we could use arg0 for the offset of the return site).
249 *
250 * We inevitably started writing scripts where we wanted to see the return
251 * values from probes in all three providers, and we made this script easier
252 * to write by replicating the syscall return values in arg1 to match fbt and
253 * pid. We debated briefly about removing the return value from arg0, but
254 * decided that it would be less confusing to have the same data in two places
255 * than to have some non-helpful, non-intuitive value in arg0.
256 *
257 * This change was made 4/23/2003 according to the DTrace project's putback log."
258 */
316670eb 259 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
260 }
261
262 return (rval);
263}
264
265void
266dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
267{
268 systrace_sysent_t *sy;
269 dtrace_id_t id;
270
271 // Bounds "check" the value of code a la unix_syscall_return
272 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
273
274 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 275 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
276 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
277
278 if (uthread)
279 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
280
281 /*
282 * "Decode" rv for use in the call to dtrace_probe()
283 */
284 if (rval == ERESTART) {
b0d623f7
A
285 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
286 munged_rv1 = -1LL;
2d21ac55
A
287 } else if (rval != EJUSTRETURN) {
288 if (rval) {
b0d623f7
A
289 munged_rv0 = -1LL; /* Mimic what libc will do. */
290 munged_rv1 = -1LL;
2d21ac55
A
291 } else {
292 switch (sy->stsy_return_type) {
293 case _SYSCALL_RET_INT_T:
b0d623f7
A
294 munged_rv0 = rv[0];
295 munged_rv1 = rv[1];
2d21ac55
A
296 break;
297 case _SYSCALL_RET_UINT_T:
b0d623f7
A
298 munged_rv0 = ((u_int)rv[0]);
299 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
300 break;
301 case _SYSCALL_RET_OFF_T:
d1ecb069 302 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
303 munged_rv0 = *(u_int64_t *)rv;
304 munged_rv1 = 0LL;
2d21ac55
A
305 break;
306 case _SYSCALL_RET_ADDR_T:
307 case _SYSCALL_RET_SIZE_T:
308 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
309 munged_rv0 = *(user_addr_t *)rv;
310 munged_rv1 = 0LL;
2d21ac55
A
311 break;
312 case _SYSCALL_RET_NONE:
b0d623f7
A
313 munged_rv0 = 0LL;
314 munged_rv1 = 0LL;
2d21ac55
A
315 break;
316 default:
b0d623f7
A
317 munged_rv0 = 0LL;
318 munged_rv1 = 0LL;
2d21ac55
A
319 break;
320 }
321 }
b0d623f7
A
322 } else {
323 munged_rv0 = 0LL;
324 munged_rv1 = 0LL;
325 }
2d21ac55 326
316670eb 327 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
328 }
329}
330#endif /* __APPLE__ */
331
332#define SYSTRACE_SHIFT 16
333#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
334#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
335#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
336#define SYSTRACE_RETURN(id) (id)
337
338#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
339#error 1 << SYSTRACE_SHIFT must exceed number of system calls
340#endif
341
342static dev_info_t *systrace_devi;
343static dtrace_provider_id_t systrace_id;
344
b0d623f7 345#if !defined (__APPLE__)
2d21ac55
A
346static void
347systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
348{
349 systrace_sysent_t *sysent = *interposed;
350 int i;
351
352 if (sysent == NULL) {
353 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
354 NSYSCALL, KM_SLEEP);
355 }
356
357 for (i = 0; i < NSYSCALL; i++) {
358 struct sysent *a = &actual[i];
359 systrace_sysent_t *s = &sysent[i];
360
361 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
362 continue;
363
364 if (a->sy_callc == dtrace_systrace_syscall)
365 continue;
366
367#ifdef _SYSCALL32_IMPL
368 if (a->sy_callc == dtrace_systrace_syscall32)
369 continue;
370#endif
371
372 s->stsy_underlying = a->sy_callc;
b0d623f7
A
373 }
374}
375#else
376#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
377static void
378systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
379{
380
381 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
382 from bsd/sys/sysent.h */
383 int i;
384
385 if (ssysent == NULL) {
386 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
387 NSYSCALL, KM_SLEEP);
388 }
389
390 for (i = 0; i < NSYSCALL; i++) {
391 struct sysent *a = &actual[i];
392 systrace_sysent_t *s = &ssysent[i];
393
394 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
395 continue;
396
397 if (a->sy_callc == dtrace_systrace_syscall)
398 continue;
399
400#ifdef _SYSCALL32_IMPL
401 if (a->sy_callc == dtrace_systrace_syscall32)
402 continue;
2d21ac55 403#endif
b0d623f7
A
404
405 s->stsy_underlying = a->sy_callc;
406 s->stsy_return_type = a->sy_return_type;
2d21ac55 407 }
316670eb 408 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
2d21ac55
A
409}
410
b0d623f7
A
411#endif /* __APPLE__ */
412
2d21ac55
A
413/*ARGSUSED*/
414static void
415systrace_provide(void *arg, const dtrace_probedesc_t *desc)
416{
b0d623f7 417#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
418 int i;
419
420 if (desc != NULL)
421 return;
422
423 systrace_init(sysent, &systrace_sysent);
424#ifdef _SYSCALL32_IMPL
425 systrace_init(sysent32, &systrace_sysent32);
426#endif
427
428 for (i = 0; i < NSYSCALL; i++) {
429 if (systrace_sysent[i].stsy_underlying == NULL)
430 continue;
431
432 if (dtrace_probe_lookup(systrace_id, NULL,
433 syscallnames[i], "entry") != 0)
434 continue;
435
436 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
437 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
438 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
440 "return", SYSTRACE_ARTIFICIAL_FRAMES,
441 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
442
443 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
444 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
445#ifdef _SYSCALL32_IMPL
446 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
447 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
448#endif
449 }
450}
451#if defined(__APPLE__)
452#undef systrace_init
453#endif
454
455/*ARGSUSED*/
456static void
457systrace_destroy(void *arg, dtrace_id_t id, void *parg)
458{
b0d623f7
A
459#pragma unused(arg,id) /* __APPLE__ */
460
2d21ac55
A
461 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
462
b0d623f7 463#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
464 /*
465 * There's nothing to do here but assert that we have actually been
466 * disabled.
467 */
468 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
469 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
470#ifdef _SYSCALL32_IMPL
471 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
472#endif
473 } else {
474 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
475#ifdef _SYSCALL32_IMPL
476 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
477#endif
478 }
479}
480
481/*ARGSUSED*/
6d2010ae 482static int
2d21ac55
A
483systrace_enable(void *arg, dtrace_id_t id, void *parg)
484{
b0d623f7
A
485#pragma unused(arg) /* __APPLE__ */
486
2d21ac55
A
487 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
488 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
489 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
490
491 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
492 systrace_sysent[sysnum].stsy_entry = id;
493#ifdef _SYSCALL32_IMPL
494 systrace_sysent32[sysnum].stsy_entry = id;
495#endif
496 } else {
497 systrace_sysent[sysnum].stsy_return = id;
498#ifdef _SYSCALL32_IMPL
499 systrace_sysent32[sysnum].stsy_return = id;
500#endif
501 }
502
503 if (enabled) {
504 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
6d2010ae 505 return(0);
2d21ac55 506 }
2d21ac55
A
507#ifdef _SYSCALL32_IMPL
508 (void) casptr(&sysent32[sysnum].sy_callc,
509 (void *)systrace_sysent32[sysnum].stsy_underlying,
510 (void *)dtrace_systrace_syscall32);
511#endif
316670eb
A
512
513 lck_mtx_lock(&dtrace_systrace_lock);
514 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
515 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
516 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
517 }
518 lck_mtx_unlock(&dtrace_systrace_lock);
6d2010ae 519 return (0);
2d21ac55
A
520}
521
522/*ARGSUSED*/
523static void
524systrace_disable(void *arg, dtrace_id_t id, void *parg)
525{
b0d623f7
A
526#pragma unused(arg,id) /* __APPLE__ */
527
2d21ac55
A
528 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
529 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
530 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
531
532 if (disable) {
316670eb
A
533 lck_mtx_lock(&dtrace_systrace_lock);
534 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
535 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
536 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55
A
537
538#ifdef _SYSCALL32_IMPL
539 (void) casptr(&sysent32[sysnum].sy_callc,
540 (void *)dtrace_systrace_syscall32,
541 (void *)systrace_sysent32[sysnum].stsy_underlying);
542#endif
543 }
544
545 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
546 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
547#ifdef _SYSCALL32_IMPL
548 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
549#endif
550 } else {
551 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
552#ifdef _SYSCALL32_IMPL
553 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
554#endif
555 }
556}
557
558static dtrace_pattr_t systrace_attr = {
559{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
560{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
561{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
562{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
563{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
564};
565
566static dtrace_pops_t systrace_pops = {
567 systrace_provide,
568 NULL,
569 systrace_enable,
570 systrace_disable,
571 NULL,
572 NULL,
573 NULL,
316670eb 574 systrace_getarg,
2d21ac55
A
575 NULL,
576 systrace_destroy
577};
578
579static int
580systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
581{
582 switch (cmd) {
583 case DDI_ATTACH:
584 break;
585 case DDI_RESUME:
586 return (DDI_SUCCESS);
587 default:
588 return (DDI_FAILURE);
589 }
590
b0d623f7
A
591#if !defined(__APPLE__)
592 systrace_probe = (void (*)())dtrace_probe;
2d21ac55
A
593 membar_enter();
594
595 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
596 DDI_PSEUDO, NULL) == DDI_FAILURE ||
597 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
598 &systrace_pops, NULL, &systrace_id) != 0) {
599 systrace_probe = systrace_stub;
600 ddi_remove_minor_node(devi, NULL);
601 return (DDI_FAILURE);
602 }
b0d623f7
A
603#else
604 systrace_probe = (void(*))&dtrace_probe;
605 membar_enter();
606
607 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
608 DDI_PSEUDO, 0) == DDI_FAILURE ||
609 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
610 &systrace_pops, NULL, &systrace_id) != 0) {
611 systrace_probe = systrace_stub;
612 ddi_remove_minor_node(devi, NULL);
613 return (DDI_FAILURE);
614 }
615#endif /* __APPLE__ */
2d21ac55
A
616
617 ddi_report_dev(devi);
618 systrace_devi = devi;
619
620 return (DDI_SUCCESS);
621}
622
623#if !defined(__APPLE__)
624static int
625systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
626{
627 switch (cmd) {
628 case DDI_DETACH:
629 break;
630 case DDI_SUSPEND:
631 return (DDI_SUCCESS);
632 default:
633 return (DDI_FAILURE);
634 }
635
636 if (dtrace_unregister(systrace_id) != 0)
637 return (DDI_FAILURE);
638
639 ddi_remove_minor_node(devi, NULL);
640 systrace_probe = systrace_stub;
641 return (DDI_SUCCESS);
642}
643
644/*ARGSUSED*/
645static int
646systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
647{
648 int error;
649
650 switch (infocmd) {
651 case DDI_INFO_DEVT2DEVINFO:
652 *result = (void *)systrace_devi;
653 error = DDI_SUCCESS;
654 break;
655 case DDI_INFO_DEVT2INSTANCE:
656 *result = (void *)0;
657 error = DDI_SUCCESS;
658 break;
659 default:
660 error = DDI_FAILURE;
661 }
662 return (error);
663}
664
665/*ARGSUSED*/
666static int
667systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
668{
669 return (0);
670}
671
672static struct cb_ops systrace_cb_ops = {
673 systrace_open, /* open */
674 nodev, /* close */
675 nulldev, /* strategy */
676 nulldev, /* print */
677 nodev, /* dump */
678 nodev, /* read */
679 nodev, /* write */
680 nodev, /* ioctl */
681 nodev, /* devmap */
682 nodev, /* mmap */
683 nodev, /* segmap */
684 nochpoll, /* poll */
685 ddi_prop_op, /* cb_prop_op */
686 0, /* streamtab */
687 D_NEW | D_MP /* Driver compatibility flag */
688};
689
690static struct dev_ops systrace_ops = {
691 DEVO_REV, /* devo_rev, */
692 0, /* refcnt */
693 systrace_info, /* get_dev_info */
694 nulldev, /* identify */
695 nulldev, /* probe */
696 systrace_attach, /* attach */
697 systrace_detach, /* detach */
698 nodev, /* reset */
699 &systrace_cb_ops, /* driver operations */
700 NULL, /* bus operations */
701 nodev /* dev power */
702};
703
704/*
705 * Module linkage information for the kernel.
706 */
707static struct modldrv modldrv = {
708 &mod_driverops, /* module type (this is a pseudo driver) */
709 "System Call Tracing", /* name of module */
710 &systrace_ops, /* driver ops */
711};
712
713static struct modlinkage modlinkage = {
714 MODREV_1,
715 (void *)&modldrv,
716 NULL
717};
718
719int
720_init(void)
721{
722 return (mod_install(&modlinkage));
723}
724
725int
726_info(struct modinfo *modinfop)
727{
728 return (mod_info(&modlinkage, modinfop));
729}
730
731int
732_fini(void)
733{
734 return (mod_remove(&modlinkage));
735}
736#else
737typedef kern_return_t (*mach_call_t)(void *);
738
739/* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
740typedef void mach_munge_t(const void *, void *);
741
742typedef struct {
316670eb
A
743 int mach_trap_arg_count;
744 kern_return_t (*mach_trap_function)(void *);
6d2010ae 745#if 0 /* no active architectures use mungers for mach traps */
316670eb
A
746 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
747 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
2d21ac55 748#endif
316670eb
A
749#if MACH_ASSERT
750 const char* mach_trap_name;
6d2010ae 751#endif /* MACH_ASSERT */
2d21ac55
A
752} mach_trap_t;
753
2d21ac55
A
754extern mach_trap_t mach_trap_table[];
755extern int mach_trap_count;
756
b0d623f7 757extern const char *mach_syscall_name_table[];
2d21ac55
A
758
759/* XXX From osfmk/i386/bsd_i386.c */
760struct mach_call_args {
761 syscall_arg_t arg1;
762 syscall_arg_t arg2;
763 syscall_arg_t arg3;
764 syscall_arg_t arg4;
765 syscall_arg_t arg5;
766 syscall_arg_t arg6;
767 syscall_arg_t arg7;
768 syscall_arg_t arg8;
769 syscall_arg_t arg9;
770};
771
772#undef NSYSCALL
773#define NSYSCALL mach_trap_count
774
775#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
776#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
777#endif
778
316670eb
A
779typedef struct machtrace_sysent {
780 dtrace_id_t stsy_entry;
781 dtrace_id_t stsy_return;
782 kern_return_t (*stsy_underlying)(void *);
783 int32_t stsy_return_type;
784} machtrace_sysent_t;
2d21ac55
A
785
786static machtrace_sysent_t *machtrace_sysent = NULL;
787
788void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
789 uint64_t, uint64_t, uint64_t);
790
316670eb
A
791static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
792
2d21ac55
A
793static dev_info_t *machtrace_devi;
794static dtrace_provider_id_t machtrace_id;
795
796static kern_return_t
797dtrace_machtrace_syscall(struct mach_call_args *args)
798{
799 boolean_t flavor;
800 unsigned short code;
801
802 machtrace_sysent_t *sy;
803 dtrace_id_t id;
804 kern_return_t rval;
805#if 0 /* XXX */
806 proc_t *p;
807#endif
808 syscall_arg_t *ip = (syscall_arg_t *)args;
809 mach_call_t mach_call;
810
6d2010ae 811#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
812#pragma unused(flavor)
813 {
6d2010ae 814 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
815 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
816
817 if (is_saved_state64(tagged_regs)) {
cf7d32b8 818 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
2d21ac55
A
819 } else {
820 code = -saved_state32(tagged_regs)->eax;
821 }
822 }
2d21ac55
A
823#else
824#error Unknown Architecture
825#endif
826
827 sy = &machtrace_sysent[code];
828
316670eb
A
829 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
830 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
831
832 if (uthread)
833 uthread->t_dtrace_syscall_args = (void *)ip;
834
2d21ac55 835 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
316670eb
A
836
837 if (uthread)
838 uthread->t_dtrace_syscall_args = (void *)0;
839 }
2d21ac55
A
840
841#if 0 /* XXX */
842 /*
843 * We want to explicitly allow DTrace consumers to stop a process
844 * before it actually executes the meat of the syscall.
845 */
846 p = ttoproc(curthread);
847 mutex_enter(&p->p_lock);
848 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
849 curthread->t_dtrace_stop = 0;
850 stop(PR_REQUESTED, 0);
851 }
852 mutex_exit(&p->p_lock);
853#endif
854
855 mach_call = (mach_call_t)(*sy->stsy_underlying);
856 rval = mach_call(args);
857
858 if ((id = sy->stsy_return) != DTRACE_IDNONE)
859 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
860
861 return (rval);
862}
863
864static void
865machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
866{
867 machtrace_sysent_t *msysent = *interposed;
868 int i;
869
870 if (msysent == NULL) {
871 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
872 NSYSCALL, KM_SLEEP);
873 }
874
875 for (i = 0; i < NSYSCALL; i++) {
876 mach_trap_t *a = &actual[i];
877 machtrace_sysent_t *s = &msysent[i];
878
879 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
880 continue;
881
316670eb 882 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
2d21ac55
A
883 continue;
884
316670eb 885 s->stsy_underlying = a->mach_trap_function;
2d21ac55
A
886 }
887}
888
889/*ARGSUSED*/
890static void
891machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
892{
b0d623f7
A
893#pragma unused(arg) /* __APPLE__ */
894
2d21ac55
A
895 int i;
896
897 if (desc != NULL)
898 return;
899
900 machtrace_init(mach_trap_table, &machtrace_sysent);
901
902 for (i = 0; i < NSYSCALL; i++) {
903
904 if (machtrace_sysent[i].stsy_underlying == NULL)
905 continue;
906
907 if (dtrace_probe_lookup(machtrace_id, NULL,
b0d623f7 908 mach_syscall_name_table[i], "entry") != 0)
2d21ac55
A
909 continue;
910
b0d623f7 911 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
912 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
913 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
b0d623f7 914 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
915 "return", MACHTRACE_ARTIFICIAL_FRAMES,
916 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
917
918 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
919 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
920 }
921}
922
923/*ARGSUSED*/
924static void
925machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
926{
b0d623f7 927#pragma unused(arg,id) /* __APPLE__ */
2d21ac55 928 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
b0d623f7
A
929
930#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
931
932 /*
933 * There's nothing to do here but assert that we have actually been
934 * disabled.
935 */
936 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
937 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
938 } else {
939 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
940 }
941}
942
943/*ARGSUSED*/
6d2010ae 944static int
2d21ac55
A
945machtrace_enable(void *arg, dtrace_id_t id, void *parg)
946{
b0d623f7
A
947#pragma unused(arg) /* __APPLE__ */
948
2d21ac55
A
949 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
950 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
951 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
952
953 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
954 machtrace_sysent[sysnum].stsy_entry = id;
955 } else {
956 machtrace_sysent[sysnum].stsy_return = id;
957 }
958
959 if (enabled) {
316670eb 960 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
6d2010ae 961 return(0);
2d21ac55
A
962 }
963
316670eb
A
964 lck_mtx_lock(&dtrace_systrace_lock);
965
966 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
967 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
968 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
969 }
970
971 lck_mtx_unlock(&dtrace_systrace_lock);
972
6d2010ae 973 return(0);
2d21ac55
A
974}
975
976/*ARGSUSED*/
977static void
978machtrace_disable(void *arg, dtrace_id_t id, void *parg)
979{
b0d623f7
A
980#pragma unused(arg,id) /* __APPLE__ */
981
2d21ac55
A
982 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
983 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
984 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
985
986 if (disable) {
2d21ac55 987
316670eb
A
988 lck_mtx_lock(&dtrace_systrace_lock);
989
990 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
991 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
992 }
993 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55
A
994 }
995
996 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
997 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
998 } else {
999 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
1000 }
1001}
1002
1003static dtrace_pattr_t machtrace_attr = {
1004{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1005{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1006{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1007{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1008{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1009};
1010
1011static dtrace_pops_t machtrace_pops = {
1012 machtrace_provide,
1013 NULL,
1014 machtrace_enable,
1015 machtrace_disable,
1016 NULL,
1017 NULL,
1018 NULL,
316670eb 1019 machtrace_getarg,
2d21ac55
A
1020 NULL,
1021 machtrace_destroy
1022};
1023
1024static int
1025machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1026{
1027 switch (cmd) {
1028 case DDI_ATTACH:
1029 break;
1030 case DDI_RESUME:
1031 return (DDI_SUCCESS);
1032 default:
1033 return (DDI_FAILURE);
1034 }
1035
b0d623f7
A
1036#if !defined(__APPLE__)
1037 machtrace_probe = (void (*)())dtrace_probe;
2d21ac55
A
1038 membar_enter();
1039
1040 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1041 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1042 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1043 &machtrace_pops, NULL, &machtrace_id) != 0) {
1044 machtrace_probe = systrace_stub;
b0d623f7
A
1045#else
1046 machtrace_probe = dtrace_probe;
1047 membar_enter();
1048
1049 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1050 DDI_PSEUDO, 0) == DDI_FAILURE ||
1051 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1052 &machtrace_pops, NULL, &machtrace_id) != 0) {
1053 machtrace_probe = (void (*))&systrace_stub;
1054#endif /* __APPLE__ */
2d21ac55
A
1055 ddi_remove_minor_node(devi, NULL);
1056 return (DDI_FAILURE);
1057 }
1058
1059 ddi_report_dev(devi);
1060 machtrace_devi = devi;
1061
1062 return (DDI_SUCCESS);
1063}
1064
1065d_open_t _systrace_open;
1066
1067int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1068{
1069#pragma unused(dev,flags,devtype,p)
1070 return 0;
1071}
1072
1073#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1074
1075/*
1076 * A struct describing which functions will get invoked for certain
1077 * actions.
1078 */
1079static struct cdevsw systrace_cdevsw =
1080{
1081 _systrace_open, /* open */
1082 eno_opcl, /* close */
1083 eno_rdwrt, /* read */
1084 eno_rdwrt, /* write */
1085 eno_ioctl, /* ioctl */
1086 (stop_fcn_t *)nulldev, /* stop */
1087 (reset_fcn_t *)nulldev, /* reset */
1088 NULL, /* tty's */
1089 eno_select, /* select */
1090 eno_mmap, /* mmap */
1091 eno_strat, /* strategy */
1092 eno_getc, /* getc */
1093 eno_putc, /* putc */
1094 0 /* type */
1095};
1096
1097static int gSysTraceInited = 0;
1098
1099void systrace_init( void );
1100
1101void systrace_init( void )
1102{
1103 if (0 == gSysTraceInited) {
1104 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1105
1106 if (majdevno < 0) {
1107 printf("systrace_init: failed to allocate a major number!\n");
1108 gSysTraceInited = 0;
1109 return;
1110 }
1111
b0d623f7
A
1112 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1113 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
2d21ac55
A
1114
1115 gSysTraceInited = 1;
1116 } else
1117 panic("systrace_init: called twice!\n");
1118}
1119#undef SYSTRACE_MAJOR
1120#endif /* __APPLE__ */
316670eb
A
1121
1122static uint64_t
1123systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1124{
1125#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1126 uint64_t val = 0;
1127 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1128
1129 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1130
1131 if (uthread)
1132 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1133
1134 if (!stack)
1135 return(0);
1136
1137 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1138 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1139 val = (uint64_t)*(stack+argno);
1140 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1141 return (val);
1142}
1143
1144
1145static uint64_t
1146machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1147{
1148#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1149 uint64_t val = 0;
1150 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1151
1152 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1153
1154 if (uthread)
1155 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1156
1157 if (!stack)
1158 return(0);
1159
1160 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1161 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1162 val = (uint64_t)*(stack+argno);
1163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1164 return (val);
1165}
1166