]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/systrace.c
xnu-1699.32.7.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
6d2010ae 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
b0d623f7 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
2d21ac55
A
27
28#if !defined(__APPLE__)
29#include <sys/dtrace.h>
30#include <sys/systrace.h>
31#include <sys/stat.h>
32#include <sys/systm.h>
33#include <sys/conf.h>
34#include <sys/ddi.h>
35#include <sys/sunddi.h>
36#include <sys/atomic.h>
37#define SYSTRACE_ARTIFICIAL_FRAMES 1
38#else
39
40#ifdef KERNEL
41#ifndef _KERNEL
42#define _KERNEL /* Solaris vs. Darwin */
43#endif
44#endif
45
46#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47#include <kern/thread.h>
48#include <mach/thread_status.h>
49/* XXX All of these should really be derived from syscall_sw.h */
50#if defined(__i386__) || defined (__x86_64__)
51#define SYSCALL_CLASS_SHIFT 24
52#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54#define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56typedef x86_saved_state_t savearea_t;
2d21ac55
A
57#endif
58
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/proc.h>
62#include <sys/errno.h>
63#include <sys/ioctl.h>
64#include <sys/conf.h>
65#include <sys/fcntl.h>
66#include <miscfs/devfs/devfs.h>
67
68#include <sys/dtrace.h>
69#include <sys/dtrace_impl.h>
70#include "systrace.h"
71#include <sys/stat.h>
72#include <sys/systm.h>
73#include <sys/conf.h>
74#include <sys/user.h>
75
6d2010ae
A
76#include <machine/pal_routines.h>
77
78#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
79#define SYSTRACE_ARTIFICIAL_FRAMES 2
80#define MACHTRACE_ARTIFICIAL_FRAMES 3
2d21ac55
A
81#else
82#error Unknown Architecture
83#endif
84
85#include <sys/sysent.h>
86#define sy_callc sy_call /* Map Solaris slot name to Darwin's */
87#define NSYSCALL nsysent /* and is less than 500 or so */
88
89extern const char *syscallnames[];
90
91#include <sys/dtrace_glue.h>
92#define casptr dtrace_casptr
93#define membar_enter dtrace_membar_producer
94
95#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
96#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
97
98systrace_sysent_t *systrace_sysent = NULL;
99void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
b0d623f7 100 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
2d21ac55
A
101
102void
103systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
b0d623f7 104 uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7)
2d21ac55 105{
b0d623f7 106#pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
2d21ac55
A
107}
108
109int32_t
110dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
111{
112 boolean_t flavor;
113 unsigned short code;
114
115 systrace_sysent_t *sy;
116 dtrace_id_t id;
117 int32_t rval;
118#if 0 /* XXX */
119 proc_t *p;
120#endif
121 syscall_arg_t *ip = (syscall_arg_t *)uap;
122
6d2010ae 123#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
124#pragma unused(flavor)
125 {
6d2010ae 126 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
127 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
128
129 if (is_saved_state64(tagged_regs)) {
130 x86_saved_state64_t *regs = saved_state64(tagged_regs);
131 code = regs->rax & SYSCALL_NUMBER_MASK;
132 /*
133 * Check for indirect system call... system call number
134 * passed as 'arg0'
135 */
136 if (code == 0) {
137 code = regs->rdi;
138 }
139 } else {
140 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
b0d623f7
A
141
142 if (code == 0) {
143 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
144 code = fuword(params);
145 }
2d21ac55
A
146 }
147 }
2d21ac55
A
148#else
149#error Unknown Architecture
150#endif
151
152 // Bounds "check" the value of code a la unix_syscall
153 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
154
c910b4d9
A
155 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
156 if (ip)
b0d623f7 157 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7));
c910b4d9 158 else
b0d623f7 159 (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
c910b4d9 160 }
2d21ac55
A
161
162#if 0 /* XXX */
163 /*
164 * We want to explicitly allow DTrace consumers to stop a process
165 * before it actually executes the meat of the syscall.
166 */
167 p = ttoproc(curthread);
168 mutex_enter(&p->p_lock);
169 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
170 curthread->t_dtrace_stop = 0;
171 stop(PR_REQUESTED, 0);
172 }
173 mutex_exit(&p->p_lock);
174#endif
175
176 rval = (*sy->stsy_underlying)(pp, uap, rv);
177
178 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 179 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
180 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
181
182 if (uthread)
183 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
184
185 /*
186 * "Decode" rv for use in the call to dtrace_probe()
187 */
188 if (rval == ERESTART) {
b0d623f7
A
189 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
190 munged_rv1 = -1LL;
2d21ac55
A
191 } else if (rval != EJUSTRETURN) {
192 if (rval) {
b0d623f7
A
193 munged_rv0 = -1LL; /* Mimic what libc will do. */
194 munged_rv1 = -1LL;
2d21ac55
A
195 } else {
196 switch (sy->stsy_return_type) {
197 case _SYSCALL_RET_INT_T:
b0d623f7
A
198 munged_rv0 = rv[0];
199 munged_rv1 = rv[1];
2d21ac55
A
200 break;
201 case _SYSCALL_RET_UINT_T:
b0d623f7
A
202 munged_rv0 = ((u_int)rv[0]);
203 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
204 break;
205 case _SYSCALL_RET_OFF_T:
d1ecb069 206 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
207 munged_rv0 = *(u_int64_t *)rv;
208 munged_rv1 = 0LL;
2d21ac55
A
209 break;
210 case _SYSCALL_RET_ADDR_T:
211 case _SYSCALL_RET_SIZE_T:
212 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
213 munged_rv0 = *(user_addr_t *)rv;
214 munged_rv1 = 0LL;
2d21ac55
A
215 break;
216 case _SYSCALL_RET_NONE:
b0d623f7
A
217 munged_rv0 = 0LL;
218 munged_rv1 = 0LL;
2d21ac55
A
219 break;
220 default:
b0d623f7
A
221 munged_rv0 = 0LL;
222 munged_rv1 = 0LL;
2d21ac55
A
223 break;
224 }
225 }
b0d623f7
A
226 } else {
227 munged_rv0 = 0LL;
228 munged_rv1 = 0LL;
229 }
2d21ac55 230
b0d623f7
A
231 /*
232 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
233 *
234 * "This is a bit of an historical artifact. At first, the syscall provider just
235 * had its return value in arg0, and the fbt and pid providers had their return
236 * values in arg1 (so that we could use arg0 for the offset of the return site).
237 *
238 * We inevitably started writing scripts where we wanted to see the return
239 * values from probes in all three providers, and we made this script easier
240 * to write by replicating the syscall return values in arg1 to match fbt and
241 * pid. We debated briefly about removing the return value from arg0, but
242 * decided that it would be less confusing to have the same data in two places
243 * than to have some non-helpful, non-intuitive value in arg0.
244 *
245 * This change was made 4/23/2003 according to the DTrace project's putback log."
246 */
247 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
2d21ac55
A
248 }
249
250 return (rval);
251}
252
253void
254dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
255{
256 systrace_sysent_t *sy;
257 dtrace_id_t id;
258
259 // Bounds "check" the value of code a la unix_syscall_return
260 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
261
262 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 263 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
264 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
265
266 if (uthread)
267 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
268
269 /*
270 * "Decode" rv for use in the call to dtrace_probe()
271 */
272 if (rval == ERESTART) {
b0d623f7
A
273 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
274 munged_rv1 = -1LL;
2d21ac55
A
275 } else if (rval != EJUSTRETURN) {
276 if (rval) {
b0d623f7
A
277 munged_rv0 = -1LL; /* Mimic what libc will do. */
278 munged_rv1 = -1LL;
2d21ac55
A
279 } else {
280 switch (sy->stsy_return_type) {
281 case _SYSCALL_RET_INT_T:
b0d623f7
A
282 munged_rv0 = rv[0];
283 munged_rv1 = rv[1];
2d21ac55
A
284 break;
285 case _SYSCALL_RET_UINT_T:
b0d623f7
A
286 munged_rv0 = ((u_int)rv[0]);
287 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
288 break;
289 case _SYSCALL_RET_OFF_T:
d1ecb069 290 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
291 munged_rv0 = *(u_int64_t *)rv;
292 munged_rv1 = 0LL;
2d21ac55
A
293 break;
294 case _SYSCALL_RET_ADDR_T:
295 case _SYSCALL_RET_SIZE_T:
296 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
297 munged_rv0 = *(user_addr_t *)rv;
298 munged_rv1 = 0LL;
2d21ac55
A
299 break;
300 case _SYSCALL_RET_NONE:
b0d623f7
A
301 munged_rv0 = 0LL;
302 munged_rv1 = 0LL;
2d21ac55
A
303 break;
304 default:
b0d623f7
A
305 munged_rv0 = 0LL;
306 munged_rv1 = 0LL;
2d21ac55
A
307 break;
308 }
309 }
b0d623f7
A
310 } else {
311 munged_rv0 = 0LL;
312 munged_rv1 = 0LL;
313 }
2d21ac55 314
b0d623f7 315 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
2d21ac55
A
316 }
317}
318#endif /* __APPLE__ */
319
320#define SYSTRACE_SHIFT 16
321#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
322#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
323#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
324#define SYSTRACE_RETURN(id) (id)
325
326#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
327#error 1 << SYSTRACE_SHIFT must exceed number of system calls
328#endif
329
330static dev_info_t *systrace_devi;
331static dtrace_provider_id_t systrace_id;
332
b0d623f7 333#if !defined (__APPLE__)
2d21ac55
A
334static void
335systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
336{
337 systrace_sysent_t *sysent = *interposed;
338 int i;
339
340 if (sysent == NULL) {
341 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
342 NSYSCALL, KM_SLEEP);
343 }
344
345 for (i = 0; i < NSYSCALL; i++) {
346 struct sysent *a = &actual[i];
347 systrace_sysent_t *s = &sysent[i];
348
349 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
350 continue;
351
352 if (a->sy_callc == dtrace_systrace_syscall)
353 continue;
354
355#ifdef _SYSCALL32_IMPL
356 if (a->sy_callc == dtrace_systrace_syscall32)
357 continue;
358#endif
359
360 s->stsy_underlying = a->sy_callc;
b0d623f7
A
361 }
362}
363#else
364#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
365static void
366systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
367{
368
369 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
370 from bsd/sys/sysent.h */
371 int i;
372
373 if (ssysent == NULL) {
374 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
375 NSYSCALL, KM_SLEEP);
376 }
377
378 for (i = 0; i < NSYSCALL; i++) {
379 struct sysent *a = &actual[i];
380 systrace_sysent_t *s = &ssysent[i];
381
382 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
383 continue;
384
385 if (a->sy_callc == dtrace_systrace_syscall)
386 continue;
387
388#ifdef _SYSCALL32_IMPL
389 if (a->sy_callc == dtrace_systrace_syscall32)
390 continue;
2d21ac55 391#endif
b0d623f7
A
392
393 s->stsy_underlying = a->sy_callc;
394 s->stsy_return_type = a->sy_return_type;
2d21ac55
A
395 }
396}
397
b0d623f7
A
398#endif /* __APPLE__ */
399
2d21ac55
A
400/*ARGSUSED*/
401static void
402systrace_provide(void *arg, const dtrace_probedesc_t *desc)
403{
b0d623f7 404#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
405 int i;
406
407 if (desc != NULL)
408 return;
409
410 systrace_init(sysent, &systrace_sysent);
411#ifdef _SYSCALL32_IMPL
412 systrace_init(sysent32, &systrace_sysent32);
413#endif
414
415 for (i = 0; i < NSYSCALL; i++) {
416 if (systrace_sysent[i].stsy_underlying == NULL)
417 continue;
418
419 if (dtrace_probe_lookup(systrace_id, NULL,
420 syscallnames[i], "entry") != 0)
421 continue;
422
423 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
424 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
425 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
426 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
427 "return", SYSTRACE_ARTIFICIAL_FRAMES,
428 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
429
430 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
431 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
432#ifdef _SYSCALL32_IMPL
433 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
434 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
435#endif
436 }
437}
438#if defined(__APPLE__)
439#undef systrace_init
440#endif
441
442/*ARGSUSED*/
443static void
444systrace_destroy(void *arg, dtrace_id_t id, void *parg)
445{
b0d623f7
A
446#pragma unused(arg,id) /* __APPLE__ */
447
2d21ac55
A
448 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
449
b0d623f7 450#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
451 /*
452 * There's nothing to do here but assert that we have actually been
453 * disabled.
454 */
455 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
456 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
457#ifdef _SYSCALL32_IMPL
458 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
459#endif
460 } else {
461 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
462#ifdef _SYSCALL32_IMPL
463 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
464#endif
465 }
466}
467
468/*ARGSUSED*/
6d2010ae 469static int
2d21ac55
A
470systrace_enable(void *arg, dtrace_id_t id, void *parg)
471{
b0d623f7
A
472#pragma unused(arg) /* __APPLE__ */
473
2d21ac55
A
474 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
475 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
476 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
477
478 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
479 systrace_sysent[sysnum].stsy_entry = id;
480#ifdef _SYSCALL32_IMPL
481 systrace_sysent32[sysnum].stsy_entry = id;
482#endif
483 } else {
484 systrace_sysent[sysnum].stsy_return = id;
485#ifdef _SYSCALL32_IMPL
486 systrace_sysent32[sysnum].stsy_return = id;
487#endif
488 }
489
490 if (enabled) {
491 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
6d2010ae 492 return(0);
2d21ac55
A
493 }
494
495 (void) casptr(&sysent[sysnum].sy_callc,
496 (void *)systrace_sysent[sysnum].stsy_underlying,
497 (void *)dtrace_systrace_syscall);
498#ifdef _SYSCALL32_IMPL
499 (void) casptr(&sysent32[sysnum].sy_callc,
500 (void *)systrace_sysent32[sysnum].stsy_underlying,
501 (void *)dtrace_systrace_syscall32);
502#endif
6d2010ae 503 return (0);
2d21ac55
A
504}
505
506/*ARGSUSED*/
507static void
508systrace_disable(void *arg, dtrace_id_t id, void *parg)
509{
b0d623f7
A
510#pragma unused(arg,id) /* __APPLE__ */
511
2d21ac55
A
512 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
513 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
514 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
515
516 if (disable) {
517 (void) casptr(&sysent[sysnum].sy_callc,
518 (void *)dtrace_systrace_syscall,
519 (void *)systrace_sysent[sysnum].stsy_underlying);
520
521#ifdef _SYSCALL32_IMPL
522 (void) casptr(&sysent32[sysnum].sy_callc,
523 (void *)dtrace_systrace_syscall32,
524 (void *)systrace_sysent32[sysnum].stsy_underlying);
525#endif
526 }
527
528 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
529 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
530#ifdef _SYSCALL32_IMPL
531 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
532#endif
533 } else {
534 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
535#ifdef _SYSCALL32_IMPL
536 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
537#endif
538 }
539}
540
541static dtrace_pattr_t systrace_attr = {
542{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
543{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
544{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
545{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
546{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
547};
548
549static dtrace_pops_t systrace_pops = {
550 systrace_provide,
551 NULL,
552 systrace_enable,
553 systrace_disable,
554 NULL,
555 NULL,
556 NULL,
557 NULL,
558 NULL,
559 systrace_destroy
560};
561
562static int
563systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
564{
565 switch (cmd) {
566 case DDI_ATTACH:
567 break;
568 case DDI_RESUME:
569 return (DDI_SUCCESS);
570 default:
571 return (DDI_FAILURE);
572 }
573
b0d623f7
A
574#if !defined(__APPLE__)
575 systrace_probe = (void (*)())dtrace_probe;
2d21ac55
A
576 membar_enter();
577
578 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
579 DDI_PSEUDO, NULL) == DDI_FAILURE ||
580 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
581 &systrace_pops, NULL, &systrace_id) != 0) {
582 systrace_probe = systrace_stub;
583 ddi_remove_minor_node(devi, NULL);
584 return (DDI_FAILURE);
585 }
b0d623f7
A
586#else
587 systrace_probe = (void(*))&dtrace_probe;
588 membar_enter();
589
590 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
591 DDI_PSEUDO, 0) == DDI_FAILURE ||
592 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
593 &systrace_pops, NULL, &systrace_id) != 0) {
594 systrace_probe = systrace_stub;
595 ddi_remove_minor_node(devi, NULL);
596 return (DDI_FAILURE);
597 }
598#endif /* __APPLE__ */
2d21ac55
A
599
600 ddi_report_dev(devi);
601 systrace_devi = devi;
602
603 return (DDI_SUCCESS);
604}
605
606#if !defined(__APPLE__)
607static int
608systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
609{
610 switch (cmd) {
611 case DDI_DETACH:
612 break;
613 case DDI_SUSPEND:
614 return (DDI_SUCCESS);
615 default:
616 return (DDI_FAILURE);
617 }
618
619 if (dtrace_unregister(systrace_id) != 0)
620 return (DDI_FAILURE);
621
622 ddi_remove_minor_node(devi, NULL);
623 systrace_probe = systrace_stub;
624 return (DDI_SUCCESS);
625}
626
627/*ARGSUSED*/
628static int
629systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
630{
631 int error;
632
633 switch (infocmd) {
634 case DDI_INFO_DEVT2DEVINFO:
635 *result = (void *)systrace_devi;
636 error = DDI_SUCCESS;
637 break;
638 case DDI_INFO_DEVT2INSTANCE:
639 *result = (void *)0;
640 error = DDI_SUCCESS;
641 break;
642 default:
643 error = DDI_FAILURE;
644 }
645 return (error);
646}
647
648/*ARGSUSED*/
649static int
650systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
651{
652 return (0);
653}
654
655static struct cb_ops systrace_cb_ops = {
656 systrace_open, /* open */
657 nodev, /* close */
658 nulldev, /* strategy */
659 nulldev, /* print */
660 nodev, /* dump */
661 nodev, /* read */
662 nodev, /* write */
663 nodev, /* ioctl */
664 nodev, /* devmap */
665 nodev, /* mmap */
666 nodev, /* segmap */
667 nochpoll, /* poll */
668 ddi_prop_op, /* cb_prop_op */
669 0, /* streamtab */
670 D_NEW | D_MP /* Driver compatibility flag */
671};
672
673static struct dev_ops systrace_ops = {
674 DEVO_REV, /* devo_rev, */
675 0, /* refcnt */
676 systrace_info, /* get_dev_info */
677 nulldev, /* identify */
678 nulldev, /* probe */
679 systrace_attach, /* attach */
680 systrace_detach, /* detach */
681 nodev, /* reset */
682 &systrace_cb_ops, /* driver operations */
683 NULL, /* bus operations */
684 nodev /* dev power */
685};
686
687/*
688 * Module linkage information for the kernel.
689 */
690static struct modldrv modldrv = {
691 &mod_driverops, /* module type (this is a pseudo driver) */
692 "System Call Tracing", /* name of module */
693 &systrace_ops, /* driver ops */
694};
695
696static struct modlinkage modlinkage = {
697 MODREV_1,
698 (void *)&modldrv,
699 NULL
700};
701
702int
703_init(void)
704{
705 return (mod_install(&modlinkage));
706}
707
708int
709_info(struct modinfo *modinfop)
710{
711 return (mod_info(&modlinkage, modinfop));
712}
713
714int
715_fini(void)
716{
717 return (mod_remove(&modlinkage));
718}
719#else
720typedef kern_return_t (*mach_call_t)(void *);
721
722/* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
723typedef void mach_munge_t(const void *, void *);
724
725typedef struct {
726 int mach_trap_arg_count;
727 int (*mach_trap_function)(void);
6d2010ae 728#if 0 /* no active architectures use mungers for mach traps */
2d21ac55
A
729 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
730 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
731#endif
6d2010ae 732#if MACH_ASSERT
2d21ac55 733 const char* mach_trap_name;
6d2010ae 734#endif /* MACH_ASSERT */
2d21ac55
A
735} mach_trap_t;
736
2d21ac55
A
737extern mach_trap_t mach_trap_table[];
738extern int mach_trap_count;
739
b0d623f7 740extern const char *mach_syscall_name_table[];
2d21ac55
A
741
742/* XXX From osfmk/i386/bsd_i386.c */
743struct mach_call_args {
744 syscall_arg_t arg1;
745 syscall_arg_t arg2;
746 syscall_arg_t arg3;
747 syscall_arg_t arg4;
748 syscall_arg_t arg5;
749 syscall_arg_t arg6;
750 syscall_arg_t arg7;
751 syscall_arg_t arg8;
752 syscall_arg_t arg9;
753};
754
755#undef NSYSCALL
756#define NSYSCALL mach_trap_count
757
758#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
759#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
760#endif
761
762typedef systrace_sysent_t machtrace_sysent_t;
763
764static machtrace_sysent_t *machtrace_sysent = NULL;
765
766void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
767 uint64_t, uint64_t, uint64_t);
768
769static dev_info_t *machtrace_devi;
770static dtrace_provider_id_t machtrace_id;
771
772static kern_return_t
773dtrace_machtrace_syscall(struct mach_call_args *args)
774{
775 boolean_t flavor;
776 unsigned short code;
777
778 machtrace_sysent_t *sy;
779 dtrace_id_t id;
780 kern_return_t rval;
781#if 0 /* XXX */
782 proc_t *p;
783#endif
784 syscall_arg_t *ip = (syscall_arg_t *)args;
785 mach_call_t mach_call;
786
6d2010ae 787#if defined(__i386__) || defined (__x86_64__)
2d21ac55
A
788#pragma unused(flavor)
789 {
6d2010ae 790 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
791 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
792
793 if (is_saved_state64(tagged_regs)) {
cf7d32b8 794 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
2d21ac55
A
795 } else {
796 code = -saved_state32(tagged_regs)->eax;
797 }
798 }
2d21ac55
A
799#else
800#error Unknown Architecture
801#endif
802
803 sy = &machtrace_sysent[code];
804
805 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
806 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
807
808#if 0 /* XXX */
809 /*
810 * We want to explicitly allow DTrace consumers to stop a process
811 * before it actually executes the meat of the syscall.
812 */
813 p = ttoproc(curthread);
814 mutex_enter(&p->p_lock);
815 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
816 curthread->t_dtrace_stop = 0;
817 stop(PR_REQUESTED, 0);
818 }
819 mutex_exit(&p->p_lock);
820#endif
821
822 mach_call = (mach_call_t)(*sy->stsy_underlying);
823 rval = mach_call(args);
824
825 if ((id = sy->stsy_return) != DTRACE_IDNONE)
826 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
827
828 return (rval);
829}
830
831static void
832machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
833{
834 machtrace_sysent_t *msysent = *interposed;
835 int i;
836
837 if (msysent == NULL) {
838 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
839 NSYSCALL, KM_SLEEP);
840 }
841
842 for (i = 0; i < NSYSCALL; i++) {
843 mach_trap_t *a = &actual[i];
844 machtrace_sysent_t *s = &msysent[i];
845
846 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
847 continue;
848
849 if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
850 continue;
851
b0d623f7 852 s->stsy_underlying = (sy_call_t *)a->mach_trap_function;
2d21ac55
A
853 }
854}
855
856/*ARGSUSED*/
857static void
858machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
859{
b0d623f7
A
860#pragma unused(arg) /* __APPLE__ */
861
2d21ac55
A
862 int i;
863
864 if (desc != NULL)
865 return;
866
867 machtrace_init(mach_trap_table, &machtrace_sysent);
868
869 for (i = 0; i < NSYSCALL; i++) {
870
871 if (machtrace_sysent[i].stsy_underlying == NULL)
872 continue;
873
874 if (dtrace_probe_lookup(machtrace_id, NULL,
b0d623f7 875 mach_syscall_name_table[i], "entry") != 0)
2d21ac55
A
876 continue;
877
b0d623f7 878 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
879 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
880 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
b0d623f7 881 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
882 "return", MACHTRACE_ARTIFICIAL_FRAMES,
883 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
884
885 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
886 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
887 }
888}
889
890/*ARGSUSED*/
891static void
892machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
893{
b0d623f7 894#pragma unused(arg,id) /* __APPLE__ */
2d21ac55 895 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
b0d623f7
A
896
897#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
898
899 /*
900 * There's nothing to do here but assert that we have actually been
901 * disabled.
902 */
903 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
904 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
905 } else {
906 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
907 }
908}
909
910/*ARGSUSED*/
6d2010ae 911static int
2d21ac55
A
912machtrace_enable(void *arg, dtrace_id_t id, void *parg)
913{
b0d623f7
A
914#pragma unused(arg) /* __APPLE__ */
915
2d21ac55
A
916 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
917 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
918 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
919
920 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
921 machtrace_sysent[sysnum].stsy_entry = id;
922 } else {
923 machtrace_sysent[sysnum].stsy_return = id;
924 }
925
926 if (enabled) {
b0d623f7 927 ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
6d2010ae 928 return(0);
2d21ac55
A
929 }
930
931 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
932 (void *)machtrace_sysent[sysnum].stsy_underlying,
933 (void *)dtrace_machtrace_syscall);
6d2010ae 934 return(0);
2d21ac55
A
935}
936
937/*ARGSUSED*/
938static void
939machtrace_disable(void *arg, dtrace_id_t id, void *parg)
940{
b0d623f7
A
941#pragma unused(arg,id) /* __APPLE__ */
942
2d21ac55
A
943 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
944 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
945 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
946
947 if (disable) {
948 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
949 (void *)dtrace_machtrace_syscall,
950 (void *)machtrace_sysent[sysnum].stsy_underlying);
951
952 }
953
954 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
955 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
956 } else {
957 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
958 }
959}
960
961static dtrace_pattr_t machtrace_attr = {
962{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
963{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
964{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
965{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
966{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
967};
968
969static dtrace_pops_t machtrace_pops = {
970 machtrace_provide,
971 NULL,
972 machtrace_enable,
973 machtrace_disable,
974 NULL,
975 NULL,
976 NULL,
977 NULL,
978 NULL,
979 machtrace_destroy
980};
981
982static int
983machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
984{
985 switch (cmd) {
986 case DDI_ATTACH:
987 break;
988 case DDI_RESUME:
989 return (DDI_SUCCESS);
990 default:
991 return (DDI_FAILURE);
992 }
993
b0d623f7
A
994#if !defined(__APPLE__)
995 machtrace_probe = (void (*)())dtrace_probe;
2d21ac55
A
996 membar_enter();
997
998 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
999 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1000 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1001 &machtrace_pops, NULL, &machtrace_id) != 0) {
1002 machtrace_probe = systrace_stub;
b0d623f7
A
1003#else
1004 machtrace_probe = dtrace_probe;
1005 membar_enter();
1006
1007 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1008 DDI_PSEUDO, 0) == DDI_FAILURE ||
1009 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1010 &machtrace_pops, NULL, &machtrace_id) != 0) {
1011 machtrace_probe = (void (*))&systrace_stub;
1012#endif /* __APPLE__ */
2d21ac55
A
1013 ddi_remove_minor_node(devi, NULL);
1014 return (DDI_FAILURE);
1015 }
1016
1017 ddi_report_dev(devi);
1018 machtrace_devi = devi;
1019
1020 return (DDI_SUCCESS);
1021}
1022
1023d_open_t _systrace_open;
1024
1025int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1026{
1027#pragma unused(dev,flags,devtype,p)
1028 return 0;
1029}
1030
1031#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1032
1033/*
1034 * A struct describing which functions will get invoked for certain
1035 * actions.
1036 */
1037static struct cdevsw systrace_cdevsw =
1038{
1039 _systrace_open, /* open */
1040 eno_opcl, /* close */
1041 eno_rdwrt, /* read */
1042 eno_rdwrt, /* write */
1043 eno_ioctl, /* ioctl */
1044 (stop_fcn_t *)nulldev, /* stop */
1045 (reset_fcn_t *)nulldev, /* reset */
1046 NULL, /* tty's */
1047 eno_select, /* select */
1048 eno_mmap, /* mmap */
1049 eno_strat, /* strategy */
1050 eno_getc, /* getc */
1051 eno_putc, /* putc */
1052 0 /* type */
1053};
1054
1055static int gSysTraceInited = 0;
1056
1057void systrace_init( void );
1058
1059void systrace_init( void )
1060{
1061 if (0 == gSysTraceInited) {
1062 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1063
1064 if (majdevno < 0) {
1065 printf("systrace_init: failed to allocate a major number!\n");
1066 gSysTraceInited = 0;
1067 return;
1068 }
1069
b0d623f7
A
1070 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1071 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
2d21ac55
A
1072
1073 gSysTraceInited = 1;
1074 } else
1075 panic("systrace_init: called twice!\n");
1076}
1077#undef SYSTRACE_MAJOR
1078#endif /* __APPLE__ */